Kinetica C# API  Version 6.2.0.1
RecordKey.cs
Go to the documentation of this file.
1 using System;
2 using System.Collections.Generic;
3 using System.Text.RegularExpressions;
4 
5 
6 namespace kinetica.Utils
7 {
13  internal sealed class RecordKey
14  {
18  private static readonly Regex DATE_REGEX = new Regex("\\A(\\d{4})-(\\d{2})-(\\d{2})$");
19 
23  private static readonly Regex DATETIME_REGEX = new Regex("\\A(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})(?<time>\\s+(?<hour>\\d{1,2}):(?<min>\\d{2}):(?<sec>\\d{2})(?:\\.(?<ms>\\d{1,6}))?)?$");
24 
28  private static readonly Regex DECIMAL_REGEX = new Regex("\\A\\s*(?<sign>[+-]?)((?<int>\\d+)(\\.(?<intfrac>\\d{0,4}))?|\\.(?<onlyfrac>\\d{1,4}))\\s*\\z");
29 
33  private static readonly Regex IPV4_REGEX = new Regex("\\A(?<a>\\d{1,3})\\.(?<b>\\d{1,3})\\.(?<c>\\d{1,3})\\.(?<d>\\d{1,3})$");
34 
38  private static readonly Regex TIME_REGEX = new Regex("\\A(?<hour>\\d{1,2}):(?<minute>\\d{2}):(?<seconds>\\d{2})(\\.(?<milliseconds>\\d{1,3}))?$");
39 
43  private static readonly DateTime EPOCH_DATE = new DateTime(1970, 1, 1);
44 
48  private static readonly int MIN_SUPPORTED_YEAR = 1000;
49 
53  private static readonly int MAX_SUPPORTED_YEAR = 2900;
54 
58  private static readonly int YEAR_1900 = 1900;
59 
63  private static readonly TimeZoneInfo UTC = TimeZoneInfo.Utc;
64 
65  private readonly byte[] buffer;
66  private readonly int buffer_size;
67  private int current_size;
68  private int hash_code;
69  private bool is_valid;
70  private long routingHash;
71 
77  public RecordKey(int size)
78  {
79  if (size < 1)
80  throw new KineticaException("Buffer size must be greater than or equal to 1. "
81  + "Size given: " + size);
82  buffer_size = size;
83  current_size = 0;
84  buffer = new byte[size];
85  this.is_valid = true;
86  }
87 
92  public bool isValid()
93  {
94  return this.is_valid;
95  }
96 
101  public int hashCode()
102  {
103  return this.hash_code;
104  }
105 
106 
107 
114  private bool isBufferFull(bool throw_if_full = true)
115  {
116  if (this.current_size == this.buffer_size)
117  {
118  if (throw_if_full)
119  throw new KineticaException("The buffer is already full!");
120  return true; // yes, the buffer is full, and we haven't thrown
121  }
122  return false; // buffer is NOT full
123  } // end isBufferFull
124 
136  private bool willBufferOverflow(int n, bool throw_if_overflow = true)
137  {
138  // Note: We're not checking for a negative value for n here
139  if ((this.current_size + n) > this.buffer_size)
140  {
141  if (throw_if_overflow)
142  throw new KineticaException($"The buffer (of size {buffer_size}) does not have sufficient room in it to put {n} more byte(s) (current size is {this.current_size}).");
143  return true; // yes, the buffer WILL overflow, but we haven't thrown
144  }
145  return false; // buffer will NOT overflow
146  } // end willBufferOverflow
147 
148 
155  private void add(byte b)
156  {
157  // Add the byte to the buffer and increment the size
158  buffer.SetValue(b, current_size++);
159  } // end add()
160 
161 
162 
167  public void addInt(int? value)
168  {
169  // Check if the given number of characters will fit in the buffer
170  this.willBufferOverflow(4); // int is four bytes long
171 
172  // Handle nulls
173  if (value == null)
174  {
175  // Add four zero bytes for the null value
176  this.add((byte)0); // 1st 0
177  this.add((byte)0); // 2nd 0
178  this.add((byte)0); // 3rd 0
179  this.add((byte)0); // 4th 0
180  return;
181  }
182 
183  // Put the integer into the array, but first convert to bytes
184  byte[] int_bytes = BitConverter.GetBytes((int)value);
185 
186  // Add the four bytes
187  foreach (byte b in int_bytes)
188  this.add(b);
189  } // end addInt
190 
191 
196  public void addInt8(int? value)
197  {
198  // Check if the given number of characters will fit in the buffer
199  this.willBufferOverflow(1); // int8 is one byte long
200 
201  // Handle nulls
202  if (value == null)
203  {
204  // Add one zero byte for the null value
205  this.add((byte)0);
206  return;
207  }
208 
209  // Put the integer into the array, but first convert to byte
210  this.add((byte)value);
211  } // end addInt8
212 
213 
218  public void addInt16(int? value)
219  {
220  // Check if the given number of characters will fit in the buffer
221  this.willBufferOverflow(2); // int16 is two bytes long
222 
223  // Handle nulls
224  if (value == null)
225  {
226  // Add two zero bytes for the null value
227  this.add((byte)0); // 1st 0
228  this.add((byte)0); // 2nd 0
229  return;
230  }
231 
232  // Put the short into the array, but first convert to bytes
233  byte[] short_bytes = BitConverter.GetBytes((short)value);
234 
235  // Add the two bytes
236  foreach (byte b in short_bytes)
237  this.add(b);
238  } // end addInt16
239 
240 
241 
246  public void addLong(long? value)
247  {
248  // Check if the given number of characters will fit in the buffer
249  this.willBufferOverflow(8); // int is eight bytes long
250 
251  // Handle nulls
252  if (value == null)
253  {
254  // Add four zero bytes for the null value
255  this.add((byte)0); // 1st 0
256  this.add((byte)0); // 2nd 0
257  this.add((byte)0); // 3rd 0
258  this.add((byte)0); // 4th 0
259  this.add((byte)0); // 5th 0
260  this.add((byte)0); // 6th 0
261  this.add((byte)0); // 7th 0
262  this.add((byte)0); // 8th 0
263  return;
264  }
265 
266  // Put the long into the array, but first convert to bytes
267  byte[] long_bytes = BitConverter.GetBytes((long)value);
268 
269  // Add the eight bytes
270  foreach (byte b in long_bytes)
271  this.add(b);
272  } // end addLong
273 
274 
279  public void addFloat(float? value)
280  {
281  // Check if the given number of characters will fit in the buffer
282  this.willBufferOverflow(4); // int is four bytes long
283 
284  // Handle nulls
285  if (value == null)
286  {
287  // Add four zero bytes for the null value
288  this.add((byte)0.0f); // 1st 0
289  this.add((byte)0.0f); // 2nd 0
290  this.add((byte)0.0f); // 3rd 0
291  this.add((byte)0.0f); // 4th 0
292  return;
293  }
294 
295  // Put the integer into the array, but first convert to bytes
296  byte[] float_bytes = BitConverter.GetBytes((float)value);
297 
298  // Add the four bytes
299  foreach (byte b in float_bytes)
300  this.add(b);
301  } // end addFloat
302 
303 
304 
309  public void addDouble(double? value)
310  {
311  // Check if the given number of characters will fit in the buffer
312  this.willBufferOverflow(8); // int is eight bytes long
313 
314  // Handle nulls
315  if (value == null)
316  {
317  // Add four zero bytes for the null value
318  this.add((byte)0.0); // 1st 0
319  this.add((byte)0.0); // 2nd 0
320  this.add((byte)0.0); // 3rd 0
321  this.add((byte)0.0); // 4th 0
322  this.add((byte)0.0); // 5th 0
323  this.add((byte)0.0); // 6th 0
324  this.add((byte)0.0); // 7th 0
325  this.add((byte)0.0); // 8th 0
326  return;
327  }
328 
329  // Put the integer into the array, but first convert to bytes
330  byte[] double_bytes = BitConverter.GetBytes((double)value);
331 
332  // Add the eight bytes
333  foreach (byte b in double_bytes)
334  this.add(b);
335  } // end addDouble
336 
337 
338 
344  public void addString(string value)
345  {
346  // Handle nulls
347  if (value == null)
348  {
349  this.addLong(0L);
350  return;
351  }
352 
353  // Hash the value
354  MurMurHash3.LongPair murmur = new MurMurHash3.LongPair();
355  System.Text.Encoding encoding = new System.Text.UTF8Encoding();
356  byte[] input = encoding.GetBytes(value);
357  MurMurHash3.murmurhash3_x64_128(input, 0, (uint)input.Length, 10, out murmur);
358 
359  // Add the hashed value to the buffer
360  this.addLong(murmur.val1);
361  } // end addString
362 
363 
364 
373  public void addCharN(string value, int N)
374  {
375  // Check if the given number of characters will fit in the buffer
376  this.willBufferOverflow(N);
378  //if ( ( this.current_size + N ) > buffer_size )
379  // throw new KineticaException( $"The given {N} character(s) will not fit in the buffer (of size {buffer_size}) which has {this.current_size} bytes in it already." );
380 
381  // Handle nulls
382  if (value == null)
383  {
384  for (int i = 0; i < N; ++i)
385  {
386  this.add((byte)0);
387  }
388  return;
389  }
390 
391  // Encode the string into bytes (using the UTF-8 encoding)
392  byte[] bytes = System.Text.Encoding.UTF8.GetBytes(value);
393  int byte_count = bytes.GetLength(0);
394 
395  // Truncate longer strings to the given length
396  if (byte_count > N)
397  byte_count = N;
398 
399  // Put the characters in the byte buffer in the little endian
400  // order (which means it will be right to left)
401  // ----------------------------------------------------------
402  // First, pad with any zeroes "at the end"
403  for (int i = N; i > byte_count; --i)
404  {
405  this.add((byte)0);
406  }
407 
408  // Then, put all the characters (in reverse order)
409  for (int i = (byte_count - 1); i >= 0; --i)
410  {
411  this.add(bytes[i]);
412  }
413  } // end addCharN()
414 
415 
422  public void addDate(string value)
423  {
424  // Check and throw if the buffer is already full
425  this.isBufferFull(true);
426 
427  // Handle nulls
428  if (value == null)
429  {
430  this.addInt(0);
431  return;
432  }
433 
434  // Check that the given value matches the YYYY-MM-DD pattern
435  Match match = DATE_REGEX.Match(value);
436  if (!match.Success)
437  {
438  // No match, so the key is invalid
439  this.is_valid = false;
440  this.addInt(0);
441  return;
442  }
443 
444  // We'll need to parse the string into year, month, and day
445  int year, month, day;
446  DateTime date;
447  System.Globalization.GregorianCalendar calendar = new System.Globalization.GregorianCalendar();
448 
449  // Parse the string value
450  try
451  {
452  year = int.Parse(match.Groups[1].ToString());
453  month = int.Parse(match.Groups[2].ToString());
454  day = int.Parse(match.Groups[3].ToString());
455  date = new DateTime(year, month, day, calendar);
456  }
457  catch (Exception ex)
458  {
459  // Upon any error, set this key to be invalid
460  this.addInt(0);
461  this.is_valid = false;
462  return;
463  }
464 
465  // Kinetica does not support years outside the range [1000, 2900]
466  if ((year < MIN_SUPPORTED_YEAR) || (year > MAX_SUPPORTED_YEAR))
467  {
468  this.addInt(0);
469  this.is_valid = false;
470  return;
471  }
472 
473  int fixed_day_of_week = ((int)calendar.GetDayOfWeek(date) + 1);
474 
475  // Deduce the integer representing the date
476  int date_integer = (((year - YEAR_1900) << 21)
477  | (month << 17)
478  | (day << 12)
479  | (calendar.GetDayOfYear(date) << 3)
480  | fixed_day_of_week);
481  this.addInt(date_integer);
482  } // end addDate()
483 
484 
491  public void addDateTime(string value)
492  {
493  // Check and throw if the buffer is already full
494  this.isBufferFull(true);
495 
496  // Handle nulls
497  if (value == null)
498  {
499  this.addLong(0);
500  return;
501  }
502 
503  // Check that the given value matches the YYYY-MM-DD HH:MM:SS.mmm pattern
504  Match match = DATETIME_REGEX.Match(value);
505  if (!match.Success)
506  {
507  // No match, so the key is invalid
508  this.is_valid = false;
509  this.addLong(0);
510  return;
511  }
512 
513  // We'll need to parse the string into year, month, day, hour,
514  // minute, second, and millisecond
515  int year, month, day;
516  int hour = 0;
517  int minute = 0;
518  int second = 0;
519  int msecond = 0;
520  DateTime date;
521  System.Globalization.GregorianCalendar calendar = new System.Globalization.GregorianCalendar();
522 
523  // Parse the string value
524  try
525  {
526  year = int.Parse(match.Groups["year"].Value);
527  month = int.Parse(match.Groups["month"].Value);
528  day = int.Parse(match.Groups["day"].Value);
529 
530  // Handle the optional time part
531  Group time_group = match.Groups["time"];
532  if (time_group.Success)
533  {
534  hour = int.Parse(match.Groups["hour"].Value);
535  minute = int.Parse(match.Groups["min"].Value);
536  second = int.Parse(match.Groups["sec"].Value);
537 
538  // Handle the further optional milliseconds
539  Group ms_group = match.Groups["ms"];
540  if (ms_group.Success)
541  {
542  msecond = int.Parse(match.Groups["ms"].Value);
543  // Need to have the milliseconds be milliseconds (three digits)
544  switch (ms_group.Value.Length)
545  {
546  case 1:
547  msecond *= 100; break;
548  case 2:
549  msecond *= 10; break;
550  // No need for case 3
551  case 4:
552  msecond /= 10; break;
553  case 5:
554  msecond /= 100; break;
555  case 6:
556  msecond /= 1000; break;
557  }
558  }
559  } // end parsing the time component
560 
561  // Now put it all together
562  date = new DateTime(year, month, day, hour, minute, second, msecond, calendar);
563  }
564  catch (Exception ex)
565  {
566  // Upon any error, set this key to be invalid
567  this.addLong(0);
568  this.is_valid = false;
569  return;
570  }
571 
572  // Kinetica does not support years outside the range [1000, 2900]
573  if ((year < MIN_SUPPORTED_YEAR) || (year > MAX_SUPPORTED_YEAR))
574  {
575  this.addLong(0);
576  this.is_valid = false;
577  return;
578  }
579 
580  int fixed_day_of_week = ((int)calendar.GetDayOfWeek(date) + 1);
581 
582  // Deduce the integer representing the date
583  long datetime_long = (long)((((long)(year - YEAR_1900)) << 53)
584  | (((long)month) << 49)
585  | (((long)day) << 44)
586  | (((long)hour) << 39)
587  | (((long)minute) << 33)
588  | (((long)second) << 27)
589  | (((long)msecond) << 17)
590  | (((long)calendar.GetDayOfYear(date)) << 8)
591  | (((long)fixed_day_of_week) << 5));
592  this.addLong(datetime_long);
593  } // end addDateTime()
594 
595 
602  public void addDecimal(string value)
603  {
604  // Check and throw if the buffer is already full
605  this.isBufferFull(true);
606 
607  // Handle nulls
608  if (value == null)
609  {
610  this.addLong(0L);
611  return;
612  }
613 
614  // Check that the given value matches the decimal regular expression pattern
615  Match match = DECIMAL_REGEX.Match(value);
616  if (!match.Success)
617  {
618  // No match, so the key is invalid
619  this.is_valid = false;
620  this.addLong(0L);
621  return;
622  }
623 
624  // Parse the string value
625  long decimal_value;
626  try
627  {
628  // Extract the integral and fractional parts
629  Group integral_group = match.Groups["int"];
630  Group fraction_with_integral_group = match.Groups["intfrac"];
631  Group frac_only_group = match.Groups["onlyfrac"];
632 
633  if (integral_group.Success)
634  { // Has an integral part to the decimal
635  decimal_value = long.Parse(integral_group.Value);
636 
637  if (fraction_with_integral_group.Success)
638  { // Also have a fractional part
639  long fraction = 0;
640  // The fraction could be zero in length (i.e. the string ends with the decimal point)
641  if (fraction_with_integral_group.Value.Length > 0)
642  fraction = long.Parse(fraction_with_integral_group.Value);
643 
644  // We need to shift the integral part to the left appropriately
645  // before adding the fraction
646  long integral_part = decimal_value * (long)Math.Pow(10, fraction_with_integral_group.Value.Length);
647  decimal_value = integral_part + fraction;
648 
649  // Shift it further to the left if the fraction is less than 1000
650  switch (fraction_with_integral_group.Value.Length)
651  {
652  case 1:
653  decimal_value *= 1000; break;
654  case 2:
655  decimal_value *= 100; break;
656  case 3:
657  decimal_value *= 10; break;
658  }
659  }
660  }
661  else if (frac_only_group.Success)
662  { // Only the fractional part is given
663  decimal_value = long.Parse(frac_only_group.Value);
664 
665  // Adjust the value so that it is always four digits long
666  switch (frac_only_group.Value.Length)
667  {
668  case 1:
669  decimal_value *= 1000; break;
670  case 2:
671  decimal_value *= 100; break;
672  case 3:
673  decimal_value *= 10; break;
674  }
675  }
676  else
677  throw new KineticaException("No match for decimal!");
678 
679  // Now handle the sign
680  Group sign_group = match.Groups["sign"];
681  if (sign_group.Success)
682  { // Needs action only if negative
683  if (sign_group.Value == "-")
684  decimal_value = (-1) * decimal_value;
685  }
686  }
687  catch (Exception ex)
688  {
689  // Upon any error, set this key to be invalid
690  this.addLong(0L);
691  this.is_valid = false;
692  return;
693  }
694 
695  // Deduce the integer representing the date
696  this.addLong(decimal_value);
697  } // end addDecimal()
698 
699 
706  public void addIPv4(string value)
707  {
708  // Check and throw if the buffer is already full
709  this.isBufferFull(true);
710 
711  // Handle nulls
712  if (value == null)
713  {
714  this.addInt(0);
715  return;
716  }
717 
718  // Check that the given value matches the XXX.XXX.XXX.XXX pattern
719  Match match = IPV4_REGEX.Match(value);
720  if (!match.Success)
721  {
722  // No match, so the key is invalid
723  this.is_valid = false;
724  this.addInt(0);
725  return;
726  }
727 
728  // We'll need to parse the string into four integers
729  int a, b, c, d;
730 
731  // Parse the string value
732  try
733  {
734  a = int.Parse(match.Groups["a"].Value);
735  b = int.Parse(match.Groups["b"].Value);
736  c = int.Parse(match.Groups["c"].Value);
737  d = int.Parse(match.Groups["d"].Value);
738  }
739  catch (Exception ex)
740  {
741  // Upon any error, set this key to be invalid
742  this.addInt(0);
743  this.is_valid = false;
744  return;
745  }
746 
747  // Each byte has to be within the range [0, 255] (the regex does
748  // not support negative numbers, so no worries about those)
749  if ((a > 255) || (b > 255) || (c > 255) || (d > 255))
750  {
751  this.addInt(0);
752  this.is_valid = false;
753  return;
754  }
755 
756  // Deduce the integer representing the date
757  int ipv4_integer = ((a << 24) | (b << 16) | (c << 8) | d);
758  this.addInt(ipv4_integer);
759  } // end addIPv4()
760 
761 
769  public void addTime(string value)
770  {
771  // Check and throw if the buffer is already full
772  this.isBufferFull(true);
773 
774  // Handle nulls
775  if (value == null)
776  {
777  this.addInt(0);
778  return;
779  }
780 
781  // Check that the given value matches the HH:MM:SS[.mmm] pattern
782  Match match = TIME_REGEX.Match(value);
783  if (!match.Success)
784  {
785  // No match, so the key is invalid
786  this.is_valid = false;
787  this.addInt(0);
788  return;
789  }
790 
791  // We'll need to parse the string into four integers
792  uint hour, minute, second, milliseconds;
793 
794  // Parse the string value
795  try
796  {
797  hour = uint.Parse(match.Groups["hour"].Value);
798  minute = uint.Parse(match.Groups["minute"].Value);
799  second = uint.Parse(match.Groups["seconds"].Value);
800  Group msec_group = match.Groups["milliseconds"];
801 
802  // Milliseconds are optional
803  milliseconds = 0;
804  if (msec_group.Success)
805  {
806  milliseconds = uint.Parse(msec_group.Value);
807 
808  // Handle single and double digits for milliseconds
809  switch (msec_group.Value.Length)
810  {
811  case 1:
812  milliseconds *= 100; break;
813  case 2:
814  milliseconds *= 10; break;
815  }
816  }
817  }
818  catch (Exception ex)
819  {
820  // Upon any error, set this key to be invalid
821  this.addInt(0);
822  this.is_valid = false;
823  return;
824  }
825 
826  // Validate the hour, minute, second values
827  if ((hour > 23) || (minute > 59) || (second > 59))
828  {
829  this.addInt(0);
830  this.is_valid = false;
831  return;
832  }
833 
834  // Deduce the integer representing the time
835  int time_integer = (int)((hour << 26) | (minute << 20) | (second << 14) | (milliseconds << 4));
836  this.addInt(time_integer);
837  } // end addTime()
838 
839 
844  public void addTimeStamp(long? value)
845  {
846  // Handle nulls
847  if (value == null)
848  {
849  this.addLong(0);
850  return;
851  }
852 
853  // Encode the timestamp the way the database server does it
854  DateTime time = EPOCH_DATE.AddMilliseconds((double)value);
855  long fixed_day_of_week = ((long)time.DayOfWeek + 1);
856 
857  long timestamp = (long)((((long)(time.Year - YEAR_1900)) << 53)
858  | (((long)(time.Month)) << 49)
859  | (((long)time.Day) << 44)
860  | (((long)time.Hour) << 39)
861  | (((long)time.Minute) << 33)
862  | (((long)time.Second) << 27)
863  | (((long)time.Millisecond) << 17)
864  | (((long)time.DayOfYear) << 8)
865  | (fixed_day_of_week << 5));
866  this.addLong(timestamp);
867  } // end addTimeStamp()
868 
869 
870 
877  public void computHashes()
878  {
879  // Check all the values for the key have been added
880  if (this.current_size != this.buffer_size)
881  throw new KineticaException("The RecordKey buffer is not full; check that all the relevant values have been added.");
882 
883  // Hash the value
884  MurMurHash3.LongPair murmur = new MurMurHash3.LongPair();
885  MurMurHash3.murmurhash3_x64_128(this.buffer, 0, (uint)this.buffer_size, 10, out murmur);
886 
887  // Save the hash value
888  this.routingHash = murmur.val1;
889  this.hash_code = (int)(this.routingHash ^ ((this.routingHash >> 32) & 0x0000ffffL));
890  } // end computHashes
891 
892 
893 
900  public int route(IList<int> routingTable)
901  {
902  // Return 1 less than the value of the nth element of routingTable where
903  // n == (record key hash) % (number of elements in routingTable)
904  // (because the 1st worker rank is the 0th element in the worker list)
905  return (routingTable[Math.Abs((int)(this.routingHash % routingTable.Count))] - 1);
906  } // end route
907 
908  } // end class RecordKey
909 
910 } // end namespace kinetica.Utils
static void murmurhash3_x64_128(byte[] key, uint offset, uint len, int seed, out LongPair output)
Returns the MurmurHash3_x64_128 hash, placing the result in output
Definition: MurMurHash3.cs:59