Kinetica   C#   API  Version 7.2.3.1
GenericRecordKeyBuilder.cs
Go to the documentation of this file.
1 using System;
2 using System.Collections.Generic;
3 using System.Text;
4 using System.Text.RegularExpressions;
5 
6 namespace kinetica.Utils;
7 
12  internal sealed class GenericRecordKeyBuilder
13  {
17  private enum ColumnType
18  {
19  CHAR1,
20  CHAR2,
21  CHAR4,
22  CHAR8,
23  CHAR16,
24  CHAR32,
25  CHAR64,
26  CHAR128,
27  CHAR256,
28  DATE,
29  DATETIME,
30  DECIMAL,
31  DECIMAL_BIG,
32  DOUBLE,
33  FLOAT,
34  INT,
35  INT8,
36  INT16,
37  IPV4,
38  LONG,
39  STRING,
40  TIME,
41  TIMESTAMP,
42  BOOLEAN
43  }
44 
45  private struct DecimalInfo
46  {
47  public int Precision;
48  public int Scale;
49  }
50 
51  private static readonly Regex DECIMAL_REGEX = new Regex(@"decimal\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)", RegexOptions.IgnoreCase);
52 
53  private readonly KineticaType _ktype;
54  private readonly IList<int> _routingColumnIndices;
55  private readonly IList<ColumnType> _columnTypes;
56  private readonly IDictionary<int, DecimalInfo> _decimalInfos;
57  private readonly IList<string> _routingColumnNames;
58  private int _bufferSize;
59 
65  public GenericRecordKeyBuilder(bool isPrimaryKey, KineticaType ktype)
66  {
67  _ktype = ktype ?? throw new ArgumentNullException(nameof(ktype));
68  _bufferSize = 0;
69  _routingColumnIndices = new List<int>();
70  _columnTypes = new List<ColumnType>();
71  _decimalInfos = new Dictionary<int, DecimalInfo>();
72  _routingColumnNames = new List<string>();
73 
74  // Check for track type table
75  bool hasTimestamp = false;
76  bool hasX = false;
77  bool hasY = false;
78  int trackIdColumnIdx = -1;
79 
80  IList<KineticaType.Column> columns = ktype.getColumns();
81  for (int i = 0; i < columns.Count; ++i)
82  {
83  KineticaType.Column column = columns[i];
84 
85  switch (column.getName())
86  {
87  case "TRACKID":
88  trackIdColumnIdx = i;
89  break;
90  case "TIMESTAMP":
91  hasTimestamp = true;
92  break;
93  case "x":
94  hasX = true;
95  break;
96  case "y":
97  hasY = true;
98  break;
99  }
100 
101  // Check for primary/shard key
102  if (isPrimaryKey && column.getProperties().Contains(ColumnProperty.PRIMARY_KEY))
103  {
104  _routingColumnIndices.Add(i);
105  }
106  else if (!isPrimaryKey && column.getProperties().Contains(ColumnProperty.SHARD_KEY))
107  {
108  _routingColumnIndices.Add(i);
109  }
110  }
111 
112  // Handle track type table
113  if (!isPrimaryKey && hasTimestamp && hasX && hasY && trackIdColumnIdx != -1)
114  {
115  if (_routingColumnIndices.Count == 0)
116  {
117  _routingColumnIndices.Add(trackIdColumnIdx);
118  }
119  else if (_routingColumnIndices.Count != 1 || _routingColumnIndices[0] != trackIdColumnIdx)
120  {
121  throw new KineticaException("Cannot have a shard key other than 'TRACKID' for track tables.");
122  }
123  }
124 
125  // Determine column types and buffer size
126  foreach (int i in _routingColumnIndices)
127  {
128  KineticaType.Column column = columns[i];
129  _routingColumnNames.Add(column.getName());
130 
131  switch (column.getType())
132  {
133  case KineticaType.Column.ColumnType.FLOAT:
134  _columnTypes.Add(ColumnType.FLOAT);
135  _bufferSize += 4;
136  break;
137 
138  case KineticaType.Column.ColumnType.DOUBLE:
139  _columnTypes.Add(ColumnType.DOUBLE);
140  _bufferSize += 8;
141  break;
142 
143  case KineticaType.Column.ColumnType.INT:
144  if (column.getProperties().Contains(ColumnProperty.INT8))
145  {
146  _columnTypes.Add(ColumnType.INT8);
147  _bufferSize += 1;
148  }
149  else if (column.getProperties().Contains(ColumnProperty.INT16))
150  {
151  _columnTypes.Add(ColumnType.INT16);
152  _bufferSize += 2;
153  }
154  else
155  {
156  _columnTypes.Add(ColumnType.INT);
157  _bufferSize += 4;
158  }
159  break;
160 
161  case KineticaType.Column.ColumnType.LONG:
162  if (column.getProperties().Contains(ColumnProperty.TIMESTAMP))
163  {
164  _columnTypes.Add(ColumnType.TIMESTAMP);
165  }
166  else
167  {
168  _columnTypes.Add(ColumnType.LONG);
169  }
170  _bufferSize += 8;
171  break;
172 
173  case KineticaType.Column.ColumnType.STRING:
174  DetermineStringColumnType(column);
175  break;
176 
177  case KineticaType.Column.ColumnType.BYTES:
178  throw new KineticaException("Cannot use bytes column as key.");
179 
180  default:
181  throw new KineticaException($"Unknown column type: {column.getType()}");
182  }
183  }
184  }
185 
186  private void DetermineStringColumnType(KineticaType.Column column)
187  {
188  var properties = column.getProperties();
189 
190  if (properties.Contains(ColumnProperty.CHAR1))
191  {
192  _columnTypes.Add(ColumnType.CHAR1);
193  _bufferSize += 1;
194  }
195  else if (properties.Contains(ColumnProperty.CHAR2))
196  {
197  _columnTypes.Add(ColumnType.CHAR2);
198  _bufferSize += 2;
199  }
200  else if (properties.Contains(ColumnProperty.CHAR4))
201  {
202  _columnTypes.Add(ColumnType.CHAR4);
203  _bufferSize += 4;
204  }
205  else if (properties.Contains(ColumnProperty.CHAR8))
206  {
207  _columnTypes.Add(ColumnType.CHAR8);
208  _bufferSize += 8;
209  }
210  else if (properties.Contains(ColumnProperty.CHAR16))
211  {
212  _columnTypes.Add(ColumnType.CHAR16);
213  _bufferSize += 16;
214  }
215  else if (properties.Contains(ColumnProperty.CHAR32))
216  {
217  _columnTypes.Add(ColumnType.CHAR32);
218  _bufferSize += 32;
219  }
220  else if (properties.Contains(ColumnProperty.CHAR64))
221  {
222  _columnTypes.Add(ColumnType.CHAR64);
223  _bufferSize += 64;
224  }
225  else if (properties.Contains(ColumnProperty.CHAR128))
226  {
227  _columnTypes.Add(ColumnType.CHAR128);
228  _bufferSize += 128;
229  }
230  else if (properties.Contains(ColumnProperty.CHAR256))
231  {
232  _columnTypes.Add(ColumnType.CHAR256);
233  _bufferSize += 256;
234  }
235  else if (properties.Contains(ColumnProperty.DATE))
236  {
237  _columnTypes.Add(ColumnType.DATE);
238  _bufferSize += 4;
239  }
240  else if (properties.Contains(ColumnProperty.DATETIME))
241  {
242  _columnTypes.Add(ColumnType.DATETIME);
243  _bufferSize += 8;
244  }
245  else if (properties.Contains(ColumnProperty.TIME))
246  {
247  _columnTypes.Add(ColumnType.TIME);
248  _bufferSize += 4;
249  }
250  else if (properties.Contains(ColumnProperty.IPV4))
251  {
252  _columnTypes.Add(ColumnType.IPV4);
253  _bufferSize += 4;
254  }
255  else if (properties.Contains(ColumnProperty.DECIMAL))
256  {
257  // Parse decimal precision/scale from properties
258  var decimalMatch = DECIMAL_REGEX.Match(string.Join(",", properties));
259  int precision = 18;
260  int scale = 4;
261  if (decimalMatch.Success)
262  {
263  precision = int.Parse(decimalMatch.Groups[1].Value);
264  scale = int.Parse(decimalMatch.Groups[2].Value);
265  }
266 
267  _decimalInfos[_columnTypes.Count] = new DecimalInfo { Precision = precision, Scale = scale };
268 
269  if (precision <= 18)
270  {
271  _columnTypes.Add(ColumnType.DECIMAL);
272  _bufferSize += 8;
273  }
274  else
275  {
276  _columnTypes.Add(ColumnType.DECIMAL_BIG);
277  _bufferSize += 12;
278  }
279  }
280  else
281  {
282  // Regular string - use 8 bytes for hash
283  _columnTypes.Add(ColumnType.STRING);
284  _bufferSize += 8;
285  }
286  }
287 
291  public bool HasKey()
292  {
293  return _routingColumnIndices.Count > 0;
294  }
295 
299  public IList<string> GetRoutingColumnNames()
300  {
301  return _routingColumnNames;
302  }
303 
309  public string? BuildExpression(IDictionary<string, object?> keyValues)
310  {
311  if (_bufferSize == 0)
312  return null;
313 
314  var expression = new StringBuilder("(");
315 
316  for (int i = 0; i < _routingColumnIndices.Count; ++i)
317  {
318  if (i > 0)
319  expression.Append(" and ");
320 
321  KineticaType.Column column = _ktype.getColumns()[_routingColumnIndices[i]];
322  string columnName = column.getName();
323 
324  // Get value from dictionary
325  object? value = null;
326  if (keyValues.ContainsKey(columnName))
327  {
328  value = keyValues[columnName];
329  }
330 
331  // Handle null values
332  if (value == null)
333  {
334  expression.Append("is_null(");
335  expression.Append(columnName);
336  expression.Append(")");
337  continue;
338  }
339 
340  // Add column comparison
341  expression.Append("(");
342  expression.Append(columnName);
343  expression.Append(" = ");
344 
345  // Format value based on column type
346  switch (_columnTypes[i])
347  {
348  case ColumnType.CHAR1:
349  case ColumnType.CHAR2:
350  case ColumnType.CHAR4:
351  case ColumnType.CHAR8:
352  case ColumnType.CHAR16:
353  case ColumnType.CHAR32:
354  case ColumnType.CHAR64:
355  case ColumnType.CHAR128:
356  case ColumnType.CHAR256:
357  case ColumnType.DATE:
358  case ColumnType.DATETIME:
359  case ColumnType.IPV4:
360  case ColumnType.STRING:
361  case ColumnType.TIME:
362  // Quote string values and escape single quotes
363  var strValue = value.ToString()?.Replace("'", "''") ?? "";
364  expression.Append("'");
365  expression.Append(strValue);
366  expression.Append("'");
367  break;
368 
369  case ColumnType.DECIMAL:
370  case ColumnType.DECIMAL_BIG:
371  if (value is decimal d)
372  expression.Append(d.ToString(System.Globalization.CultureInfo.InvariantCulture));
373  else
374  expression.Append(Convert.ToDecimal(value).ToString(System.Globalization.CultureInfo.InvariantCulture));
375  break;
376 
377  case ColumnType.DOUBLE:
378  if (value is double dbl)
379  expression.Append(dbl.ToString(System.Globalization.CultureInfo.InvariantCulture));
380  else
381  expression.Append(Convert.ToDouble(value).ToString(System.Globalization.CultureInfo.InvariantCulture));
382  break;
383 
384  case ColumnType.FLOAT:
385  if (value is float f)
386  expression.Append(f.ToString(System.Globalization.CultureInfo.InvariantCulture));
387  else
388  expression.Append(Convert.ToSingle(value).ToString(System.Globalization.CultureInfo.InvariantCulture));
389  break;
390 
391  default:
392  // Numeric types (INT, INT8, INT16, LONG, TIMESTAMP)
393  expression.Append(value.ToString());
394  break;
395  }
396 
397  expression.Append(")");
398  }
399 
400  expression.Append(")");
401  return expression.ToString();
402  }
403 
409  internal RecordKey? Build(IDictionary<string, object?> keyValues)
410  {
411  if (_bufferSize == 0)
412  return null;
413 
414  RecordKey key = new RecordKey(_bufferSize);
415 
416  for (int i = 0; i < _routingColumnIndices.Count; ++i)
417  {
418  KineticaType.Column column = _ktype.getColumns()[_routingColumnIndices[i]];
419  string columnName = column.getName();
420 
421  object? value = null;
422  if (keyValues.ContainsKey(columnName))
423  {
424  value = keyValues[columnName];
425  }
426 
427  // Add value to key based on type
428  switch (_columnTypes[i])
429  {
430  case ColumnType.CHAR1:
431  key.addCharN(value?.ToString(), 1);
432  break;
433  case ColumnType.CHAR2:
434  key.addCharN(value?.ToString(), 2);
435  break;
436  case ColumnType.CHAR4:
437  key.addCharN(value?.ToString(), 4);
438  break;
439  case ColumnType.CHAR8:
440  key.addCharN(value?.ToString(), 8);
441  break;
442  case ColumnType.CHAR16:
443  key.addCharN(value?.ToString(), 16);
444  break;
445  case ColumnType.CHAR32:
446  key.addCharN(value?.ToString(), 32);
447  break;
448  case ColumnType.CHAR64:
449  key.addCharN(value?.ToString(), 64);
450  break;
451  case ColumnType.CHAR128:
452  key.addCharN(value?.ToString(), 128);
453  break;
454  case ColumnType.CHAR256:
455  key.addCharN(value?.ToString(), 256);
456  break;
457  case ColumnType.DATE:
458  key.addDate(value?.ToString() ?? "");
459  break;
460  case ColumnType.DATETIME:
461  key.addDateTime(value?.ToString() ?? "");
462  break;
463  case ColumnType.DECIMAL:
464  {
465  var info = _decimalInfos.ContainsKey(i) ? _decimalInfos[i] : new DecimalInfo { Precision = 18, Scale = 4 };
466  if (value == null)
467  key.addDecimal(null, info.Precision, info.Scale);
468  else
469  {
470  // addDecimal expects a string representation
471  var decimalStr = Convert.ToDecimal(value).ToString(System.Globalization.CultureInfo.InvariantCulture);
472  key.addDecimal(decimalStr, info.Precision, info.Scale);
473  }
474  }
475  break;
476  case ColumnType.DECIMAL_BIG:
477  {
478  // For big decimals (precision > 18), use the same addDecimal method
479  // which handles both 8-byte and 12-byte decimals based on precision
480  var info = _decimalInfos.ContainsKey(i) ? _decimalInfos[i] : new DecimalInfo { Precision = 28, Scale = 4 };
481  if (value == null)
482  key.addDecimal(null, info.Precision, info.Scale);
483  else
484  {
485  var decimalStr = Convert.ToDecimal(value).ToString(System.Globalization.CultureInfo.InvariantCulture);
486  key.addDecimal(decimalStr, info.Precision, info.Scale);
487  }
488  }
489  break;
490  case ColumnType.DOUBLE:
491  if (value == null)
492  key.addDouble(null);
493  else
494  key.addDouble(Convert.ToDouble(value));
495  break;
496  case ColumnType.FLOAT:
497  if (value == null)
498  key.addFloat(null);
499  else
500  key.addFloat(Convert.ToSingle(value));
501  break;
502  case ColumnType.INT:
503  if (value == null)
504  key.addInt(null);
505  else
506  key.addInt(Convert.ToInt32(value));
507  break;
508  case ColumnType.INT8:
509  if (value == null)
510  key.addInt8(null);
511  else
512  key.addInt8(Convert.ToSByte(value));
513  break;
514  case ColumnType.INT16:
515  if (value == null)
516  key.addInt16(null);
517  else
518  key.addInt16(Convert.ToInt16(value));
519  break;
520  case ColumnType.IPV4:
521  key.addIPv4(value?.ToString() ?? "");
522  break;
523  case ColumnType.LONG:
524  case ColumnType.TIMESTAMP:
525  if (value == null)
526  key.addLong(null);
527  else
528  key.addLong(Convert.ToInt64(value));
529  break;
530  case ColumnType.STRING:
531  key.addString(value?.ToString() ?? "");
532  break;
533  case ColumnType.TIME:
534  key.addTime(value?.ToString() ?? "");
535  break;
536  }
537  }
538 
539  key.computeHashes();
540  return key;
541  }
542  }
const string CHAR64
This property provides optimized memory, disk and query performance for string columns.
const string INT16
This property provides optimized memory and query performance for int columns.
void addCharN(string value, int N)
Appends a charN value to the buffer.
Definition: RecordKey.cs:370
void addInt8(int? value)
Add an 8-bit integer to the buffer.
Definition: RecordKey.cs:193
const string CHAR128
This property provides optimized memory, disk and query performance for string columns.
const string SHARD_KEY
This property indicates that this column will be part of (or the entire) shard key.
void addInt16(int? value)
Add a short (two bytes) to the buffer.
Definition: RecordKey.cs:215
void addDecimal(string value, int precision, int scale)
Adds a decimal value to the buffer with specified precision and scale.
Definition: RecordKey.cs:601
const string CHAR1
This property provides optimized memory, disk and query performance for string columns.
void addDate(string value)
Adds a string to the buffer that has the 'date' property.
Definition: RecordKey.cs:419
Builds expressions and routing keys for GenericRecord and dictionary-based records.
void addInt(int? value)
Add an integer to the buffer.
Definition: RecordKey.cs:164
A binary key used for shard routing.
Definition: RecordKey.cs:16
const string TIMESTAMP
Valid only for 'long' columns.
void computeHashes()
Compute the hash of the key in the buffer.
Definition: RecordKey.cs:857
GenericRecordKeyBuilder(bool isPrimaryKey, KineticaType ktype)
Creates a GenericRecordKeyBuilder for the given KineticaType.
const string TIME
Valid only for 'string' columns.
const string DECIMAL
Valid only for 'string' columns.
const string IPV4
This property provides optimized memory, disk and query performance for string columns representing I...
const string CHAR2
This property provides optimized memory, disk and query performance for string columns.
void addTime(string value)
Adds a string to the buffer that has the 'time' property.
Definition: RecordKey.cs:749
const string CHAR8
This property provides optimized memory, disk and query performance for string columns.
bool HasKey()
Returns whether this builder has any routing key columns.
const string CHAR32
This property provides optimized memory, disk and query performance for string columns.
const string CHAR256
This property provides optimized memory, disk and query performance for string columns.
const string PRIMARY_KEY
This property indicates that this column will be part of (or the entire) primary key.
Column properties used for Kinetica types.
void addString(string value)
Add a string to the buffer.
Definition: RecordKey.cs:341
IList< Column > getColumns()
void addIPv4(string value)
Adds a string to the buffer that has the 'ipv4' property.
Definition: RecordKey.cs:686
const string DATETIME
Valid only for 'string' columns.
void addLong(long? value)
Add a long to the buffer.
Definition: RecordKey.cs:243
string getName()
Returns the name of the column.
Definition: KineticaType.cs:76
void addDouble(double? value)
Add a double to the buffer.
Definition: RecordKey.cs:306
void addDateTime(string value)
Adds a string to the buffer that has the 'datetime' property.
Definition: RecordKey.cs:488
const string INT8
This property provides optimized memory and query performance for int columns.
const string CHAR16
This property provides optimized memory, disk and query performance for string columns.
void addFloat(float? value)
Add a float to the buffer.
Definition: RecordKey.cs:276
const string CHAR4
This property provides optimized memory, disk and query performance for string columns.
const string DATE
Valid only for 'string' columns.
string? BuildExpression(IDictionary< string, object?> keyValues)
Builds a SQL expression for looking up records by key values.
IList< string > GetRoutingColumnNames()
Gets the names of the routing key columns.