GPUdb C++ API  Version 6.2.0.3
GPUdbIngestor.hpp
Go to the documentation of this file.
1 #ifndef __GPUDB_INGESTOR_HPP__
2 #define __GPUDB_INGESTOR_HPP__
3 
4 #include "gpudb/GPUdb.hpp"
5 #include "gpudb/Http.hpp"
6 #include "gpudb/Type.hpp"
8 
9 
10 #include <atomic>
11 #include <map>
12 #include <string>
13 #include <vector>
14 
15 #include <boost/noncopyable.hpp>
16 #include <boost/shared_ptr.hpp>
17 
18 
19 namespace gpudb
20 {
21 
22 // Forward declaration
23 class GPUdb;
24 
25 
26 /*
27  * The multi-head ingestor class (also handles regular insertion). Using this class is
28  * significantly more computation-intensive compared to a regular insertion. So, it is
29  * highly recommended to use this ingestor only if multi-head ingestion is actually turned
30  * on in the server and there is a large volume of records to be inserted.
31  */
32 class GPUdbIngestor : private boost::noncopyable
33 {
34 
35 public:
36 
37  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
38  const std::string& table_name,
39  const WorkerList& worker_list,
40  const std::map<std::string, std::string>& insert_options,
41  size_t batch_size );
42 
43  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
44  const std::string& table_name,
45  const WorkerList& worker_list,
46  size_t batch_size );
47 
48 
49  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
50  const std::string& table_name,
51  const std::map<std::string, std::string>& insert_options,
52  size_t batch_size );
53 
54  GPUdbIngestor( const gpudb::GPUdb& db, const gpudb::Type& record_type,
55  const std::string& table_name, size_t batch_size );
56 
58 
62  const std::string& getTableName() const { return m_table_name; }
63 
67  const gpudb::GPUdb& getGPUdb() const { return m_db; }
68 
72  const std::map<std::string, std::string>& getOptions() const { return m_insert_options; }
73 
78  size_t getCountInserted() const { return m_count_inserted; }
79 
84  size_t getCountUpdated() const { return m_count_updated; }
85 
86 
95  void flush();
96 
97 
108  void insert( gpudb::GenericRecord record );
109 
110 
121  void insert( std::vector<gpudb::GenericRecord> records );
122 
123 
124 private:
125 
126  typedef std::map<std::string, std::string> str_to_str_map_t;
127  typedef boost::shared_ptr<gpudb::WorkerQueue> worker_queue_ptr_t;
128 
129 
130  GPUdbIngestor();
131 
132  void construct( const gpudb::GPUdb& db,
133  const gpudb::Type& record_type,
134  const std::string& table_name,
135  const WorkerList& worker_list,
136  size_t batch_size );
137 
138  /*
139  * Insert the given list of records to the database residing at the given URL.
140  * Upon any error, thrown InsertException with the queue of records passed into it.
141  */
142  void flush( const std::vector<gpudb::GenericRecord>& queue,
143  const gpudb::HttpUrl& url );
144 
145  const gpudb::GPUdb& m_db;
146  std::string m_table_name;
147  size_t m_batch_size;
148  std::atomic<size_t> m_count_inserted;
149  std::atomic<size_t> m_count_updated;
150  str_to_str_map_t m_insert_options;
151  gpudb::Type m_record_type;
152  gpudb::RecordKeyBuilder* m_primary_key_builder_ptr;
153  gpudb::RecordKeyBuilder* m_shard_key_builder_ptr;
154 // record_key_buildter_ptr m_primary_key_builder_ptr;
155 // record_key_buildter_ptr m_shard_key_builder_ptr;
156  std::vector<int32_t> m_routing_table;
157  std::vector<worker_queue_ptr_t> m_worker_queues;
158 
159 }; // end class GPUdbIngestor
160 
161 
162 
163 } // namespace gpudb
164 
165 
166 
167 #endif // __GPUDB_INGESTOR_HPP__
168 
169 
void insert(gpudb::GenericRecord record)
Queues a record for insertion into GPUdb.
const std::string & getTableName() const
Returns the name of the table on which this class operates.
const gpudb::GPUdb & getGPUdb() const
Returns the GPUdb client handle that this class uses internally.
size_t getCountUpdated() const
Returns the count of records updated so far through this ingestor instance; An atomic operation...
const std::map< std::string, std::string > & getOptions() const
Returns the insertion options the ingestor uses.
size_t getCountInserted() const
Returns the count of records inserted so far through this ingestor instance; An atomic operation...
void flush()
Ensures that all queued records are inserted into the database.