Kinetica   C#   API  Version 7.2.3.1
RecordKeyBuilder.cs
Go to the documentation of this file.
1 using System.Collections.Generic;
2 using System.Text.RegularExpressions;
3 
4 
5 namespace kinetica.Utils;
6 
11  internal sealed class RecordKeyBuilder<T>
12  {
16  private enum ColumnType
17  {
18  CHAR1,
19  CHAR2,
20  CHAR4,
21  CHAR8,
22  CHAR16,
23  CHAR32,
24  CHAR64,
25  CHAR128,
26  CHAR256,
27  DATE,
28  DATETIME,
29  DECIMAL, // 8-byte decimal (precision <= 18)
30  DECIMAL_BIG, // 12-byte decimal (precision > 18)
31  DOUBLE,
32  FLOAT,
33  INT,
34  INT8,
35  INT16,
36  IPV4,
37  LONG,
38  STRING,
39  TIME,
40  TIMESTAMP
41  } // end enum ColumnType
42 
46  private struct DecimalInfo
47  {
48  public int Precision;
49  public int Scale;
50  }
51 
55  private static readonly Regex DECIMAL_REGEX = new Regex(@"decimal\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)", RegexOptions.IgnoreCase);
56 
57 
58  // Class members
59  private KineticaType ktype;
60  private IList<int> routing_column_indices;
61  private IList<ColumnType> column_types;
62  private IDictionary<int, DecimalInfo> decimal_infos; // Maps column index to precision/scale
63  private int buffer_size;
64 
65  public RecordKeyBuilder(bool is_primary_key, KineticaType ktype)
66  {
67  this.ktype = ktype;
68 
69  this.buffer_size = 0;
70  routing_column_indices = new List<int>();
71  column_types = new List<ColumnType>();
72  decimal_infos = new Dictionary<int, DecimalInfo>();
73 
74  // We need to check if the type has all of the following: x, y, timestamp, track ID
75  // (this will tell us if it's a track type table, and if so, the track ID
76  // column would be a routing column)
77  bool has_timestamp = false;
78  bool has_x = false;
79  bool has_y = false;
80  int track_id_column_idx = -1; // not found yet
81 
82  // Add indices of any primary or shard key (based on is_primary_key)
83  // to the list of routing columns
84  IList<KineticaType.Column> columns = ktype.getColumns();
85  for (int i = 0; i < columns.Count; ++i)
86  {
87  // Get the column
88  KineticaType.Column column = columns[i];
89 
90  // Check if it is one of: x, y, timestamp, track ID
91  switch (column.getName())
92  {
93  case "TRACKID":
94  track_id_column_idx = i;
95  break;
96 
97  case "TIMESTAMP":
98  has_timestamp = true;
99  break;
100 
101  case "x":
102  has_x = true;
103  break;
104 
105  case "y":
106  has_y = true;
107  break;
108  } // end switch on column name
109 
110  // Check if this column has been declared as a primary/shard key
111  // And if so, and if appropriate, add it to the routing key column list
112  if (is_primary_key && column.getProperties().Contains(ColumnProperty.PRIMARY_KEY))
113  {
114  routing_column_indices.Add(i);
115  }
116  else if (!is_primary_key && column.getProperties().Contains(ColumnProperty.SHARD_KEY))
117  {
118  routing_column_indices.Add(i);
119  }
120  } // end for loop
121 
122  // Check if this is a track-type table; if so, add the track ID column's index to the list
123  if (!is_primary_key
124  && has_timestamp && has_x && has_y && (track_id_column_idx != -1))
125  {
126  if (routing_column_indices.Count == 0)
127  {
128  routing_column_indices.Add(track_id_column_idx);
129  }
130  else if ((routing_column_indices.Count != 1)
131  || (routing_column_indices[0] != track_id_column_idx))
132  {
133  // Track type tables can't have any other routing key
134  throw new KineticaException("Cannot have a shard key other than 'TRACKID' for track tables.");
135  }
136  } // end if a track type table
137 
138 
139  // For each index of routing columns, save the column type, and increase
140  // the buffer size appropriately
141  foreach (int i in routing_column_indices)
142  {
143  // Get the column information
144  KineticaType.Column column = columns[i];
145 
146  switch (column.getType())
147  {
148  // Float and double are the simplest
149  case KineticaType.Column.ColumnType.FLOAT:
150  {
151  column_types.Add(ColumnType.FLOAT);
152  this.buffer_size += 4;
153  break;
154  }
155  case KineticaType.Column.ColumnType.DOUBLE:
156  {
157  column_types.Add(ColumnType.DOUBLE);
158  this.buffer_size += 8;
159  break;
160  }
161 
162  case KineticaType.Column.ColumnType.INT:
163  {
164  // Integer has byte, short and int
165  if (column.getProperties().Contains(ColumnProperty.INT8))
166  { // byte
167  column_types.Add(ColumnType.INT8);
168  this.buffer_size += 1;
169  }
170  else if (column.getProperties().Contains(ColumnProperty.INT16))
171  { // short
172  column_types.Add(ColumnType.INT16);
173  this.buffer_size += 2;
174  }
175  else // regular 4-byte integer
176  {
177  column_types.Add(ColumnType.INT);
178  this.buffer_size += 4;
179  }
180  break;
181  } // end case integer
182 
183  case KineticaType.Column.ColumnType.LONG:
184  {
185  // Long has the regular long and timestamp
186  if (column.getProperties().Contains(ColumnProperty.TIMESTAMP))
187  { // it's a timestamp
188  column_types.Add(ColumnType.TIMESTAMP);
189  }
190  else // regular long
191  {
192  column_types.Add(ColumnType.LONG);
193  }
194  this.buffer_size += 8;
195  break;
196  } // end case long
197 
198  case KineticaType.Column.ColumnType.STRING:
199  {
200  if (column.getProperties().Contains(ColumnProperty.CHAR1))
201  {
202  column_types.Add(ColumnType.CHAR1);
203  this.buffer_size += 1;
204  }
205  else if (column.getProperties().Contains(ColumnProperty.CHAR2))
206  {
207  column_types.Add(ColumnType.CHAR2);
208  this.buffer_size += 2;
209  }
210  else if (column.getProperties().Contains(ColumnProperty.CHAR4))
211  {
212  column_types.Add(ColumnType.CHAR4);
213  this.buffer_size += 4;
214  }
215  else if (column.getProperties().Contains(ColumnProperty.CHAR8))
216  {
217  column_types.Add(ColumnType.CHAR8);
218  this.buffer_size += 8;
219  }
220  else if (column.getProperties().Contains(ColumnProperty.CHAR16))
221  {
222  column_types.Add(ColumnType.CHAR16);
223  this.buffer_size += 16;
224  }
225  else if (column.getProperties().Contains(ColumnProperty.CHAR32))
226  {
227  column_types.Add(ColumnType.CHAR32);
228  this.buffer_size += 32;
229  }
230  else if (column.getProperties().Contains(ColumnProperty.CHAR64))
231  {
232  column_types.Add(ColumnType.CHAR64);
233  this.buffer_size += 64;
234  }
235  else if (column.getProperties().Contains(ColumnProperty.CHAR128))
236  {
237  column_types.Add(ColumnType.CHAR128);
238  this.buffer_size += 128;
239  }
240  else if (column.getProperties().Contains(ColumnProperty.CHAR256))
241  {
242  column_types.Add(ColumnType.CHAR256);
243  this.buffer_size += 256;
244  }
245  else if (column.getProperties().Contains(ColumnProperty.DATE))
246  {
247  column_types.Add(ColumnType.DATE);
248  this.buffer_size += 4;
249  }
250  else if (column.getProperties().Contains(ColumnProperty.DATETIME))
251  {
252  column_types.Add(ColumnType.DATETIME);
253  this.buffer_size += 8;
254  }
255  else if (HasDecimalProperty(column.getProperties(), out int precision, out int scale))
256  {
257  // Store decimal info for this column
258  decimal_infos[i] = new DecimalInfo { Precision = precision, Scale = scale };
259 
260  // Use 8 bytes for precision <= 18, 12 bytes for precision > 18
261  if (precision > 18)
262  {
263  column_types.Add(ColumnType.DECIMAL_BIG);
264  this.buffer_size += 12;
265  }
266  else
267  {
268  column_types.Add(ColumnType.DECIMAL);
269  this.buffer_size += 8;
270  }
271  }
272  else if (column.getProperties().Contains(ColumnProperty.IPV4))
273  {
274  column_types.Add(ColumnType.IPV4);
275  this.buffer_size += 4;
276  }
277  else if (column.getProperties().Contains(ColumnProperty.TIME))
278  {
279  column_types.Add(ColumnType.TIME);
280  this.buffer_size += 4;
281  }
282  else // regular string
283  {
284  column_types.Add(ColumnType.STRING);
285  this.buffer_size += 8;
286  }
287  break;
288  } // end case string
289 
290  // Other types are not allowed for routing columns
291  case KineticaType.Column.ColumnType.BYTES:
292  case KineticaType.Column.ColumnType.DEFAULT:
293  throw new KineticaException($"Cannot use column '{column.getName()}' as a key.");
294  } // end switch on the column's primitive data type
295  } // end foreach
296  } // end constructor RecordKeyBuilder
297 
298 
307  private static bool HasDecimalProperty(IList<string> properties, out int precision, out int scale)
308  {
309  precision = 19; // Default precision
310  scale = 4; // Default scale
311  bool foundDecimal = false;
312 
313  foreach (var prop in properties)
314  {
315  // Check for "decimal" (simple form)
316  if (prop.Equals(ColumnProperty.DECIMAL, System.StringComparison.OrdinalIgnoreCase))
317  {
318  foundDecimal = true;
319  continue;
320  }
321 
322  // Check for decimal(precision, scale) format
323  var match = DECIMAL_REGEX.Match(prop);
324  if (match.Success)
325  {
326  foundDecimal = true;
327  if (int.TryParse(match.Groups[1].Value, out int p))
328  precision = p;
329  if (int.TryParse(match.Groups[2].Value, out int s))
330  scale = s;
331  continue;
332  }
333 
334  // Check for precision=X format
335  if (prop.StartsWith("precision=", System.StringComparison.OrdinalIgnoreCase))
336  {
337  var val = prop.Substring(10);
338  if (int.TryParse(val, out int p))
339  precision = p;
340  foundDecimal = true;
341  continue;
342  }
343 
344  // Check for scale=X format
345  if (prop.StartsWith("scale=", System.StringComparison.OrdinalIgnoreCase))
346  {
347  var val = prop.Substring(6);
348  if (int.TryParse(val, out int s))
349  scale = s;
350  // Note: scale alone doesn't indicate decimal
351  continue;
352  }
353  }
354 
355  return foundDecimal;
356  }
357 
358 
366  public RecordKey build(T record)
367  {
368  // Can't build a key if the buffer size is zero!
369  if (this.buffer_size == 0)
370  return null;
371 
372  // Create the empty key
373  RecordKey key = new RecordKey(this.buffer_size);
374 
375  // Add each routing column's value to the key
376  for (int i = 0; i < this.routing_column_indices.Count; ++i)
377  {
378  // Get the column (with type and name)
379  KineticaType.Column column = this.ktype.getColumns()[this.routing_column_indices[i]];
380 
381  // Get the value out of the record using the column's name and reflection
382  var value = record.GetType().GetProperty(column.getName()).GetValue(record, null);
383 
384  switch (this.column_types[i])
385  {
386  case ColumnType.CHAR1:
387  key.addCharN((string)value, 1);
388  break;
389 
390  case ColumnType.CHAR2:
391  key.addCharN((string)value, 2);
392  break;
393 
394  case ColumnType.CHAR4:
395  key.addCharN((string)value, 4);
396  break;
397 
398  case ColumnType.CHAR8:
399  key.addCharN((string)value, 8);
400  break;
401 
402  case ColumnType.CHAR16:
403  key.addCharN((string)value, 16);
404  break;
405 
406  case ColumnType.CHAR32:
407  key.addCharN((string)value, 32);
408  break;
409 
410  case ColumnType.CHAR64:
411  key.addCharN((string)value, 64);
412  break;
413 
414  case ColumnType.CHAR128:
415  key.addCharN((string)value, 128);
416  break;
417 
418  case ColumnType.CHAR256:
419  key.addCharN((string)value, 256);
420  break;
421 
422  case ColumnType.DATE:
423  key.addDate((string)value);
424  break;
425 
426  case ColumnType.DATETIME:
427  key.addDateTime((string)value);
428  break;
429 
430  case ColumnType.DECIMAL:
431  {
432  // Get precision/scale for this column
433  var decInfo = decimal_infos.TryGetValue(this.routing_column_indices[i], out var info)
434  ? info
435  : new DecimalInfo { Precision = 19, Scale = 4 };
436  key.addDecimal((string)value, decInfo.Precision, decInfo.Scale);
437  }
438  break;
439 
440  case ColumnType.DECIMAL_BIG:
441  {
442  // Get precision/scale for this column (12-byte decimal)
443  var decInfo = decimal_infos.TryGetValue(this.routing_column_indices[i], out var info)
444  ? info
445  : new DecimalInfo { Precision = 38, Scale = 10 };
446  key.addDecimal((string)value, decInfo.Precision, decInfo.Scale);
447  }
448  break;
449 
450  case ColumnType.DOUBLE:
451  key.addDouble((double?)value);
452  break;
453 
454  case ColumnType.FLOAT:
455  key.addFloat((float?)value);
456  break;
457 
458  case ColumnType.INT:
459  key.addInt((int?)value);
460  break;
461 
462  case ColumnType.INT8:
463  key.addInt8((int?)value);
464  break;
465 
466  case ColumnType.INT16:
467  key.addInt16((int?)value);
468  break;
469 
470  case ColumnType.IPV4:
471  key.addIPv4((string)value);
472  break;
473 
474  case ColumnType.LONG:
475  key.addLong((long?)value);
476  break;
477 
478  case ColumnType.STRING:
479  key.addString((string)value);
480  break;
481 
482  case ColumnType.TIME:
483  key.addTime((string)value);
484  break;
485 
486  case ColumnType.TIMESTAMP:
487  key.addTimeStamp((long?)value);
488  break;
489  } // end switch
490  } // end for loop
491 
492  // Compute the hash for the key and return it
493  key.computeHashes();
494  return key;
495  } // end build()
496 
497 
498 
508  public string buildExpression(T record)
509  {
510  // Can't build a key if the buffer size is zero!
511  if (this.buffer_size == 0)
512  return null;
513 
514  // Create the empty expression
515  System.Text.StringBuilder expression = new System.Text.StringBuilder( "(" );
516 
517  // Add each routing column's value to the key
518  for (int i = 0; i < this.routing_column_indices.Count; ++i)
519  {
520  if ( i > 0 ) // need a conjunction
521  expression.Append( " and " );
522 
523  // Get the column (with type and name)
524  KineticaType.Column column = this.ktype.getColumns()[this.routing_column_indices[i]];
525  string column_name = column.getName();
526 
527  // Get the value out of the record using the column's name and reflection
528  var value = record.GetType().GetProperty( column_name ).GetValue( record, null );
529 
530  // Handle null values
531  if ( value == null )
532  {
533  expression.Append( "is_null(" );
534  expression.Append( column_name );
535  expression.Append( ")" );
536  continue; // nothing more to do for this column
537  }
538 
539  // Add this column to the expression
540  expression.Append( "(" );
541  expression.Append( column_name );
542  expression.Append( " = " );
543 
544  // Add the value to the expression
545  switch ( this.column_types[i] )
546  {
547  // Need to quote string values
548  case ColumnType.CHAR1:
549  case ColumnType.CHAR2:
550  case ColumnType.CHAR4:
551  case ColumnType.CHAR8:
552  case ColumnType.CHAR16:
553  case ColumnType.CHAR32:
554  case ColumnType.CHAR64:
555  case ColumnType.CHAR128:
556  case ColumnType.CHAR256:
557  case ColumnType.DATE:
558  case ColumnType.DATETIME:
559  case ColumnType.DECIMAL:
560  case ColumnType.DECIMAL_BIG:
561  case ColumnType.IPV4:
562  case ColumnType.STRING:
563  case ColumnType.TIME:
564  expression.Append( "\"" );
565  expression.Append( value );
566  expression.Append( "\"" );
567  break;
568 
569  case ColumnType.DOUBLE:
570  case ColumnType.FLOAT:
571  case ColumnType.INT:
572  case ColumnType.INT8:
573  case ColumnType.INT16:
574  case ColumnType.LONG:
575  expression.Append( value );
576  break;
577  } // end switch
578 
579  // Closing parenthesis for the column
580  expression.Append( ")" );
581  } // end for loop
582 
583  // Final closing parenthesis
584  expression.Append( ")" );
585 
586  return expression.ToString();
587  } // end buildExpression()
588 
589 
590 
596  public bool hasKey()
597  {
598  // Does it have any routing columns?
599  return !(this.routing_column_indices.Count == 0);
600  }
601 
602 
608  public bool hasSameKey(RecordKeyBuilder<T> other)
609  {
610  return this.column_types.Equals(other.column_types);
611  }
612 
613  } // end class RecordKeyBuilder
const string CHAR64
This property provides optimized memory, disk and query performance for string columns.
const string INT16
This property provides optimized memory and query performance for int columns.
void addCharN(string value, int N)
Appends a charN value to the buffer.
Definition: RecordKey.cs:370
void addInt8(int? value)
Add an 8-bit integer to the buffer.
Definition: RecordKey.cs:193
const string CHAR128
This property provides optimized memory, disk and query performance for string columns.
const string SHARD_KEY
This property indicates that this column will be part of (or the entire) shard key.
void addInt16(int? value)
Add a short (two bytes) to the buffer.
Definition: RecordKey.cs:215
void addDecimal(string value, int precision, int scale)
Adds a decimal value to the buffer with specified precision and scale.
Definition: RecordKey.cs:601
const string CHAR1
This property provides optimized memory, disk and query performance for string columns.
void addDate(string value)
Adds a string to the buffer that has the 'date' property.
Definition: RecordKey.cs:419
void addInt(int? value)
Add an integer to the buffer.
Definition: RecordKey.cs:164
A binary key used for shard routing.
Definition: RecordKey.cs:16
const string TIMESTAMP
Valid only for 'long' columns.
RecordKeyBuilder(bool is_primary_key, KineticaType ktype)
void computeHashes()
Compute the hash of the key in the buffer.
Definition: RecordKey.cs:857
bool hasKey()
Returns whether this builder builds any routing keys.
const string TIME
Valid only for 'string' columns.
const string DECIMAL
Valid only for 'string' columns.
const string IPV4
This property provides optimized memory, disk and query performance for string columns representing I...
const string CHAR2
This property provides optimized memory, disk and query performance for string columns.
void addTime(string value)
Adds a string to the buffer that has the 'time' property.
Definition: RecordKey.cs:749
const string CHAR8
This property provides optimized memory, disk and query performance for string columns.
const string CHAR32
This property provides optimized memory, disk and query performance for string columns.
const string CHAR256
This property provides optimized memory, disk and query performance for string columns.
const string PRIMARY_KEY
This property indicates that this column will be part of (or the entire) primary key.
RecordKey build(T record)
Build a RecordKey object based on a record.
Column properties used for Kinetica types.
void addString(string value)
Add a string to the buffer.
Definition: RecordKey.cs:341
bool hasSameKey(RecordKeyBuilder< T > other)
Returns if other is equivalent to this builder.
IList< Column > getColumns()
void addIPv4(string value)
Adds a string to the buffer that has the 'ipv4' property.
Definition: RecordKey.cs:686
const string DATETIME
Valid only for 'string' columns.
void addTimeStamp(long? value)
Adds a long to the buffer that has the 'timestamp' property.
Definition: RecordKey.cs:824
void addLong(long? value)
Add a long to the buffer.
Definition: RecordKey.cs:243
void addDouble(double? value)
Add a double to the buffer.
Definition: RecordKey.cs:306
void addDateTime(string value)
Adds a string to the buffer that has the 'datetime' property.
Definition: RecordKey.cs:488
const string INT8
This property provides optimized memory and query performance for int columns.
const string CHAR16
This property provides optimized memory, disk and query performance for string columns.
string buildExpression(T record)
Build an expression to be passed to getRecords in the option.
Builds or creates RecordKey objects based on a given record.
void addFloat(float? value)
Add a float to the buffer.
Definition: RecordKey.cs:276
const string CHAR4
This property provides optimized memory, disk and query performance for string columns.
const string DATE
Valid only for 'string' columns.