Kinetica   C#   API  Version 7.2.3.0
SchemaNormalization.cs
Go to the documentation of this file.
1 
19 using System.Collections.Generic;
20 using System.Text;
21 using System;
22 
23 namespace Avro
24 {
28  public static class SchemaNormalization
29  {
30  public static long Empty64 = -4513414715797952619;
31 
37  public static string ToParsingForm(Schema s)
38  {
39  IDictionary<string, string> env = new Dictionary<string, string>();
40  return Build(env, s, new StringBuilder()).ToString();
41  }
42 
72  public static byte[] Fingerprint(string fpName, byte[] data)
73  {
74  switch (fpName)
75  {
76  case "CRC-64-AVRO":
77  long fp = Fingerprint64(data);
78  byte[] result = new byte[8];
79  for (int i = 0; i < 8; i++)
80  {
81  result[i] = (byte) fp;
82  fp >>= 8;
83  }
84  return result;
85  case "MD5":
86  var md5 = System.Security.Cryptography.MD5.Create();
87  return md5.ComputeHash(data);
88  case "SHA-256":
89  var sha256 = System.Security.Cryptography.SHA256.Create();
90  return sha256.ComputeHash(data);
91  default:
92  throw new ArgumentException(string.Format("Unsupported fingerprint computation algorithm ({0})", fpName));
93  }
94  }
95 
102  public static byte[] ParsingFingerprint(string fpName, Schema s)
103  {
104  return Fingerprint(fpName, Encoding.UTF8.GetBytes(ToParsingForm(s)));
105  }
106 
112  public static long ParsingFingerprint64(Schema s)
113  {
114  return Fingerprint64(Encoding.UTF8.GetBytes(ToParsingForm(s)));
115  }
116 
122  private static long Fingerprint64(byte[] data)
123  {
124  long result = Empty64;
125  foreach (var b in data)
126  {
127  result = ((long)(((ulong)result) >> 8)) ^ Fp64.FpTable[(int) (result ^ b) & 0xff];
128  }
129  return result;
130  }
131 
132  private static StringBuilder Build(IDictionary<string, string> env, Schema s, StringBuilder o)
133  {
134  bool firstTime = true;
135  Schema.Type st = s.Tag;
136  switch (st)
137  {
138  case Schema.Type.Union:
139  UnionSchema us = s as UnionSchema;
140  o.Append('[');
141  foreach(Schema b in us.Schemas)
142  {
143  if (!firstTime)
144  {
145  o.Append(",");
146  }
147  else
148  {
149  firstTime = false;
150  }
151  Build(env, b, o);
152  }
153  return o.Append(']');
154 
155  case Schema.Type.Array:
156  case Schema.Type.Map:
157  o.Append("{\"type\":\"").Append(Schema.GetTypeString(s.Tag)).Append("\"");
158  if (st == Schema.Type.Array)
159  {
160  ArraySchema arraySchema = s as ArraySchema;
161  Build(env, arraySchema.ItemSchema, o.Append(",\"items\":"));
162  }
163  else
164  {
165  MapSchema mapSchema = s as MapSchema;
166  Build(env, mapSchema.ValueSchema, o.Append(",\"values\":"));
167  }
168  return o.Append("}");
169 
170  case Schema.Type.Enumeration:
171  case Schema.Type.Fixed:
172  case Schema.Type.Record:
173  NamedSchema namedSchema = s as NamedSchema;
174  var name = namedSchema.Fullname;
175  if (env.ContainsKey(name))
176  {
177  return o.Append(env[name]);
178  }
179  var qname = "\"" + name + "\"";
180  env.Add(name, qname);
181  o.Append("{\"name\":").Append(qname);
182  o.Append(",\"type\":\"").Append(Schema.GetTypeString(s.Tag)).Append("\"");
183  if (st == Schema.Type.Enumeration)
184  {
185  EnumSchema enumSchema = s as EnumSchema;
186  o.Append(",\"symbols\":[");
187  foreach (var enumSymbol in enumSchema.Symbols)
188  {
189  if (!firstTime)
190  {
191  o.Append(",");
192  }
193  else
194  {
195  firstTime = false;
196  }
197  o.Append("\"").Append(enumSymbol).Append("\"");
198  }
199  o.Append("]");
200  }
201  else if (st == Schema.Type.Fixed)
202  {
203  FixedSchema fixedSchema = s as FixedSchema;
204  o.Append(",\"size\":").Append(fixedSchema.Size.ToString());
205  }
206  else // st == Schema.Type.Record
207  {
208  RecordSchema recordSchema = s as RecordSchema;
209  o.Append(",\"fields\":[");
210  foreach (var field in recordSchema.Fields)
211  {
212  if (!firstTime)
213  {
214  o.Append(",");
215  }
216  else
217  {
218  firstTime = false;
219  }
220  o.Append("{\"name\":\"").Append(field.Name).Append("\"");
221  Build(env, field.Schema, o.Append(",\"type\":")).Append("}");
222  }
223  o.Append("]");
224  }
225  return o.Append("}");
226 
227  default: //boolean, bytes, double, float, int, long, null, string
228  return o.Append("\"").Append(s.Name).Append("\"");
229  }
230  }
231 
232  private static class Fp64
233  {
234  private static readonly long[] fpTable = new long[256];
235 
236  public static long[] FpTable
237  {
238  get { return fpTable; }
239  }
240 
241  static Fp64()
242  {
243  for (int i = 0; i < 256; i++)
244  {
245  long fp = i;
246  for (int j = 0; j < 8; j++)
247  {
248  long mask = -(fp & 1L);
249  fp = ((long) (((ulong) fp) >> 1)) ^ (Empty64 & mask);
250  }
251  FpTable[i] = fp;
252  }
253  }
254  }
255  }
256 }
static long ParsingFingerprint64(Schema s)
Returns Fingerprint64(byte[]) applied to the parsing canonical form of the supplied schema.
Base class for all schema types
Definition: Schema.cs:29
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.
static string ToParsingForm(Schema s)
Parses a schema into the canonical form as defined by Avro spec.
Collection of static methods for generating the cannonical form of schemas.
static byte [] ParsingFingerprint(string fpName, Schema s)
Returns Fingerprint(string, byte[]) applied to the parsing canonical form of the supplied schema.
static byte [] Fingerprint(string fpName, byte[] data)