GPUdb C++ API  Version 7.2.2.4
GPUdbIngestor.hpp
Go to the documentation of this file.
1 #ifndef __GPUDB_INGESTOR_HPP__
2 #define __GPUDB_INGESTOR_HPP__
3 
4 #include "gpudb/GPUdb.hpp"
5 #include "gpudb/Http.hpp"
6 #include "gpudb/Type.hpp"
8 
9 
10 #include <atomic>
11 #include <map>
12 #include <string>
13 #include <vector>
14 #include <mutex>
15 
16 #include <boost/noncopyable.hpp>
17 #include <boost/shared_ptr.hpp>
18 
19 
20 namespace gpudb
21 {
22 
23 // Forward declaration
24 class GPUdb;
25 
26 
27 /*
28  * The multi-head ingestor class (also handles regular insertion). Using this class is
29  * significantly more computation-intensive compared to a regular insertion. So, it is
30  * highly recommended to use this ingestor only if multi-head ingestion is actually turned
31  * on in the server and there is a large volume of records to be inserted.
32  */
33 class GPUdbIngestor : private boost::noncopyable
34 {
35 
36 public:
37 
38  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
39  const std::string& table_name,
40  const WorkerList& worker_list,
41  const std::map<std::string, std::string>& insert_options,
42  size_t batch_size );
43 
44  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
45  const std::string& table_name,
46  const WorkerList& worker_list,
47  size_t batch_size );
48 
49 
50  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
51  const std::string& table_name,
52  const std::map<std::string, std::string>& insert_options,
53  size_t batch_size );
54 
55  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
56  const std::string& table_name, size_t batch_size );
57 
59 
63  const std::string& getTableName() const { return m_table_name; }
64 
68  const gpudb::GPUdb& getGPUdb() const { return m_db; }
69 
73  const std::map<std::string, std::string>& getOptions() const { return m_insert_options; }
74 
79  size_t getCountInserted() const { return m_count_inserted; }
80 
85  size_t getCountUpdated() const { return m_count_updated; }
86 
91  std::vector<GPUdbInsertionException> getErrors();
92 
97  std::vector<GPUdbInsertionException> getWarnings();
98 
107  void flush();
108 
109 
120  void insert( gpudb::GenericRecord record );
121 
122 
133  void insert( std::vector<gpudb::GenericRecord> records );
134 
135 
136 private:
137 
138  typedef std::map<std::string, std::string> str_to_str_map_t;
139  typedef boost::shared_ptr<gpudb::WorkerQueue> worker_queue_ptr_t;
140 
141 
142  GPUdbIngestor();
143 
144  void construct( const gpudb::GPUdb& db,
145  const gpudb::Type& record_type,
146  const std::string& table_name,
147  const WorkerList& worker_list,
148  size_t batch_size );
149 
150  /*
151  * Insert the given list of records to the database residing at the given URL.
152  * Upon any error, thrown InsertException with the queue of records passed into it.
153  */
154  void flush( const std::vector<gpudb::GenericRecord>& queue,
155  const gpudb::HttpUrl& url );
156 
157  const gpudb::GPUdb& m_db;
158  std::string m_table_name;
159  size_t m_batch_size;
160  bool m_return_individual_errors;
161  bool m_simulate_error_mode; // Simulate returnIndividualErrors after an error
162  std::atomic<size_t> m_count_inserted;
163  std::atomic<size_t> m_count_updated;
164  str_to_str_map_t m_insert_options;
165  gpudb::Type m_record_type;
166  gpudb::RecordKeyBuilder* m_primary_key_builder_ptr;
167  gpudb::RecordKeyBuilder* m_shard_key_builder_ptr;
168 // record_key_buildter_ptr m_primary_key_builder_ptr;
169 // record_key_buildter_ptr m_shard_key_builder_ptr;
170  std::vector<int32_t> m_routing_table;
171  std::vector<worker_queue_ptr_t> m_worker_queues;
172  std::vector<GPUdbInsertionException> m_error_list;
173  std::vector<GPUdbInsertionException> m_warning_list;
174  std::mutex m_error_list_lock;
175 
176 }; // end class GPUdbIngestor
177 
178 
179 
180 } // namespace gpudb
181 
182 
183 
184 #endif // __GPUDB_INGESTOR_HPP__
185 
186 
void insert(gpudb::GenericRecord record)
Queues a record for insertion into GPUdb.
std::vector< GPUdbInsertionException > getWarnings()
Returns the list of warnings received since the last call to getWarnings(), and clears the list.
const std::string & getTableName() const
Returns the name of the table on which this class operates.
const gpudb::GPUdb & getGPUdb() const
Returns the GPUdb client handle that this class uses internally.
size_t getCountUpdated() const
Returns the count of records updated so far through this ingestor instance; An atomic operation.
const std::map< std::string, std::string > & getOptions() const
Returns the insertion options the ingestor uses.
size_t getCountInserted() const
Returns the count of records inserted so far through this ingestor instance; An atomic operation.
void flush()
Ensures that all queued records are inserted into the database.
std::vector< GPUdbInsertionException > getErrors()
Returns the list of errors received since the last call to getErrors(), and clears the list.