Loading...
Searching...
No Matches
json.h
Go to the documentation of this file.
1// BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
2
3#ifndef AWKWARD_IO_JSON_H_
4#define AWKWARD_IO_JSON_H_
5
6#include <complex>
7#include <cstdio>
8#include <string>
9
10#include "awkward/common.h"
15#include "awkward/util.h"
16
17namespace awkward {
23 public:
24 virtual int64_t read(int64_t num_bytes, char* buffer) = 0;
25 };
26
43 EXPORT_SYMBOL void
45 ArrayBuilder& builder,
46 int64_t buffersize,
47 bool read_one,
48 const char* nan_string = nullptr,
49 const char* posinf_string = nullptr,
50 const char* neginf_string = nullptr);
51
53 public:
55 int64_t buffersize,
56 bool read_one,
57 const char* nan_string,
58 const char* posinf_string,
59 const char* neginf_string,
60 const char* jsonassembly,
61 int64_t initial,
62 double resize); // Delete copy constructor
64
65 // Delete copy-assignment constructor
67
69 inline int64_t current_stack_depth() const noexcept {
70 return current_stack_depth_;
71 }
72
74 inline int64_t current_instruction() const noexcept {
75 return current_instruction_;
76 }
77
79 inline int64_t instruction() const noexcept {
80 return instructions_.data()[static_cast<size_t>(current_instruction_) * 4];
81 }
82
84 inline int64_t argument1() const noexcept {
85 return instructions_.data()[static_cast<size_t>(current_instruction_) * 4 + 1];
86 }
87
89 inline int64_t argument2() const noexcept {
90 return instructions_.data()[static_cast<size_t>(current_instruction_) * 4 + 2];
91 }
92
94 inline int64_t argument3() const noexcept {
95 return instructions_.data()[static_cast<size_t>(current_instruction_) * 4 + 3];
96 }
97
99 inline void step_forward() noexcept {
100 current_instruction_++;
101 }
102
104 inline void step_backward() noexcept {
105 current_instruction_--;
106 }
107
109 inline void push_stack(int64_t jump_to) noexcept {
110 instruction_stack_.data()[static_cast<size_t>(current_stack_depth_)] = current_instruction_;
111 current_stack_depth_++;
112 current_instruction_ = jump_to;
113 }
114
116 inline void pop_stack() noexcept {
117 current_stack_depth_--;
118 current_instruction_ = instruction_stack_.data()[static_cast<size_t>(current_stack_depth_)];
119 }
120
121 inline int64_t find_enum(const char* str) noexcept {
122 int64_t* offsets = string_offsets_.data();
123 char* chars = characters_.data();
124 int64_t stringsstart = argument2();
125 int64_t start;
126 int64_t stop;
127 for (int64_t i = stringsstart; i < argument3(); i++) {
128 start = offsets[i];
129 stop = offsets[i + 1];
130 if (strncmp(str, &chars[start], static_cast<size_t>(stop - start)) == 0) {
131 return i - stringsstart;
132 }
133 }
134 return -1;
135 }
136
138 inline int64_t find_key(const char* str) noexcept {
139 int64_t* offsets = string_offsets_.data();
140 char* chars = characters_.data();
141 int64_t i = 0;
142 int64_t j = 0;
143 int64_t stringi;
144 int64_t start;
145 int64_t stop;
146 uint64_t chunkmask;
147 // optimistic: fields in data are in the order specified by the schema
148 if (argument1() != 0) {
149 // increment the current (last seen) field with wrap-around
150 record_current_field_[static_cast<size_t>(argument2())]++;
151 if (record_current_field_[static_cast<size_t>(argument2())] == argument1()) {
152 record_current_field_[static_cast<size_t>(argument2())] = 0;
153 }
154 j = record_current_field_[static_cast<size_t>(argument2())];
155 // use the record_current_field_ (as j)
156 i = current_instruction_ + 1 + j;
157 stringi = instructions_.data()[static_cast<size_t>(i) * 4 + 1];
158 start = offsets[stringi];
159 stop = offsets[stringi + 1];
160 if (strncmp(str, &chars[start], static_cast<size_t>(stop - start)) == 0) {
161 // ensure that the checklist bit is 1
162 chunkmask = static_cast<uint64_t>(1) << (j & 0x3f);
163 if ((record_checklist_[static_cast<size_t>(argument2())][static_cast<size_t>(j >> 6)] & chunkmask) == 0) {
164 return -1; // ignore the value of a duplicate key
165 }
166 // set the checklist bit to 0
167 record_checklist_[static_cast<size_t>(argument2())][static_cast<size_t>(j >> 6)] &= ~chunkmask;
168 return key_instruction_at(i);
169 }
170 }
171 // pessimistic: try all field names, starting from the first
172 for (i = current_instruction_ + 1; i <= current_instruction_ + argument1(); i++) {
173 // not including the one optimistic trial
174 if (i != current_instruction_ + 1 + record_current_field_[static_cast<size_t>(argument2())]) {
175 stringi = instructions_.data()[static_cast<size_t>(i) * 4 + 1];
176 start = offsets[stringi];
177 stop = offsets[stringi + 1];
178 if (strncmp(str, &chars[start], static_cast<size_t>(stop - start)) == 0) {
179 // set the record_current_field_
180 j = i - (current_instruction_ + 1);
181 record_current_field_[static_cast<size_t>(argument2())] = j;
182 // ensure that the checklist bit is 1
183 chunkmask = static_cast<uint64_t>(1) << (j & 0x3f);
184 if ((record_checklist_[static_cast<size_t>(argument2())][static_cast<size_t>(j >> 6)] & chunkmask) == 0) {
185 return -1; // ignore the value of a duplicate key
186 }
187 // set the checklist bit to 0
188 record_checklist_[static_cast<size_t>(argument2())][static_cast<size_t>(j >> 6)] &= ~chunkmask;
189 return key_instruction_at(i);
190 }
191 }
192 }
193 return -1;
194 }
195
197 inline bool key_already_filled(int64_t record_identifier, int64_t j) const noexcept {
198 uint64_t chunkmask = static_cast<uint64_t>(1) << (j & 0x3f);
199 return (record_checklist_[static_cast<size_t>(record_identifier)][static_cast<size_t>(j >> 6)] & chunkmask) == 0;
200 }
201
203 inline int64_t key_instruction_at(int64_t i) const noexcept {
204 return instructions_.data()[static_cast<size_t>(i) * 4 + 2];
205 }
206
208 inline void start_object(int64_t keytableheader_instruction) noexcept {
209 int64_t record_identifier = instructions_.data()[static_cast<size_t>(keytableheader_instruction) * 4 + 2];
210 record_checklist_[static_cast<size_t>(record_identifier)].assign(
211 record_checklist_init_[static_cast<size_t>(record_identifier)].begin(),
212 record_checklist_init_[static_cast<size_t>(record_identifier)].end()
213 );
214 }
215
217 inline bool end_object(int64_t keytableheader_instruction) const noexcept {
218 int64_t record_identifier = instructions_.data()[static_cast<size_t>(keytableheader_instruction) * 4 + 2];
219 uint64_t should_be_zero = 0;
220 for (uint64_t chunk : record_checklist_[static_cast<size_t>(record_identifier)]) {
221 should_be_zero |= chunk;
222 }
223 return should_be_zero == 0;
224 }
225
227 inline void write_int8(int64_t index, int8_t x) noexcept {
228 buffers_uint8_[static_cast<size_t>(index)].append(*reinterpret_cast<uint8_t*>(&x));
229 }
230
232 inline void write_uint8(int64_t index, uint8_t x) noexcept {
233 buffers_uint8_[static_cast<size_t>(index)].append(x);
234 }
235
237 inline void write_many_uint8(int64_t index, int64_t num_items, const uint8_t* values) noexcept {
238 buffers_uint8_[static_cast<size_t>(index)].extend(values, static_cast<size_t>(num_items));
239 }
240
242 inline void write_int64(int64_t index, int64_t x) noexcept {
243 buffers_int64_[static_cast<size_t>(index)].append(x);
244 }
245
247 inline void write_uint64(int64_t index, uint64_t x) noexcept {
248 buffers_int64_[static_cast<size_t>(index)].append(static_cast<int64_t>(x));
249 }
250
252 inline void write_add_int64(int64_t index, int64_t x) noexcept {
253 buffers_int64_[static_cast<size_t>(index)].append(buffers_int64_[static_cast<size_t>(index)].last() + x);
254 }
255
257 inline void write_float64(int64_t index, double x) noexcept {
258 buffers_float64_[static_cast<size_t>(index)].append(x);
259 }
260
262 inline int64_t get_and_increment(int64_t index) noexcept {
263 return counters_[static_cast<size_t>(index)]++;
264 }
265
267 int64_t length() const noexcept {
268 return length_;
269 }
270
272 inline void add_to_length(int64_t length) noexcept {
273 length_ += length;
274 }
275
277 std::string debug() const noexcept;
278
280 int64_t num_outputs() const {
281 return static_cast<int64_t>(output_names_.size());
282 }
283
285 std::string output_name(int64_t i) const {
286 return output_names_[static_cast<size_t>(i)];
287 }
288
290 std::string output_dtype(int64_t i) const {
291 switch (output_dtypes_[static_cast<size_t>(i)]) {
292 case util::dtype::int8:
293 return "int8";
294 case util::dtype::uint8:
295 return "uint8";
296 case util::dtype::int64:
297 return "int64";
298 case util::dtype::float64:
299 return "float64";
300 default:
301 return "unknown";
302 }
303 }
304
306 int64_t output_num_items(int64_t i) const {
307 switch (output_dtypes_[static_cast<size_t>(i)]) {
308 case util::dtype::int8:
309 return static_cast<int64_t>(buffers_uint8_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].nbytes());
310 case util::dtype::uint8:
311 return static_cast<int64_t>(buffers_uint8_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].nbytes());
312 case util::dtype::int64:
313 return static_cast<int64_t>(buffers_int64_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].nbytes() / 8);
314 case util::dtype::float64:
315 return static_cast<int64_t>(buffers_float64_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].nbytes() / 8);
316 default:
317 return -1;
318 }
319 }
320
322 void output_fill(int64_t i, void* external_pointer) const {
323 switch (output_dtypes_[static_cast<size_t>(i)]) {
324 case util::dtype::int8:
325 buffers_uint8_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].concatenate(
326 reinterpret_cast<uint8_t*>(external_pointer)
327 );
328 break;
329 case util::dtype::uint8:
330 buffers_uint8_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].concatenate(
331 reinterpret_cast<uint8_t*>(external_pointer)
332 );
333 break;
334 case util::dtype::int64:
335 buffers_int64_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].concatenate(
336 reinterpret_cast<int64_t*>(external_pointer)
337 );
338 break;
339 case util::dtype::float64:
340 buffers_float64_[static_cast<size_t>(output_which_[static_cast<size_t>(i)])].concatenate(
341 reinterpret_cast<double*>(external_pointer)
342 );
343 break;
344 default:
345 break;
346 }
347 }
348
349 private:
350 std::vector<int64_t> instructions_;
351 std::vector<char> characters_;
352 std::vector<int64_t> string_offsets_;
353
354 std::vector<int64_t> record_current_field_;
355 std::vector<std::vector<uint64_t>> record_checklist_init_;
356 std::vector<std::vector<uint64_t>> record_checklist_;
357
358 std::vector<std::string> output_names_;
359 std::vector<util::dtype> output_dtypes_;
360 std::vector<int64_t> output_which_;
361 std::vector<GrowableBuffer<uint8_t>> buffers_uint8_;
362 std::vector<GrowableBuffer<int64_t>> buffers_int64_;
363 std::vector<GrowableBuffer<double>> buffers_float64_;
364
365 int64_t current_instruction_;
366 std::vector<int64_t> instruction_stack_;
367 int64_t current_stack_depth_;
368 std::vector<int64_t> counters_;
369
370 int64_t length_;
371 };
372
373}
374
375#endif // AWKWARD_IO_JSON_H_
User interface to the Builder system: the ArrayBuilder is a fixed reference while the Builder subclas...
Definition ArrayBuilder.h:23
Abstract class to represent a file-like object, something with a read(num_bytes) method....
Definition json.h:22
virtual int64_t read(int64_t num_bytes, char *buffer)=0
void write_many_uint8(int64_t index, int64_t num_items, const uint8_t *values) noexcept
HERE.
Definition json.h:237
FromJsonObjectSchema(const FromJsonObjectSchema &)=delete
FromJsonObjectSchema & operator=(FromJsonObjectSchema &)=delete
void step_forward() noexcept
HERE.
Definition json.h:99
void pop_stack() noexcept
HERE.
Definition json.h:116
void start_object(int64_t keytableheader_instruction) noexcept
HERE.
Definition json.h:208
FromJsonObjectSchema(FileLikeObject *source, int64_t buffersize, bool read_one, const char *nan_string, const char *posinf_string, const char *neginf_string, const char *jsonassembly, int64_t initial, double resize)
void write_uint64(int64_t index, uint64_t x) noexcept
HERE.
Definition json.h:247
void output_fill(int64_t i, void *external_pointer) const
HERE.
Definition json.h:322
int64_t instruction() const noexcept
HERE.
Definition json.h:79
int64_t length() const noexcept
HERE.
Definition json.h:267
void write_float64(int64_t index, double x) noexcept
HERE.
Definition json.h:257
int64_t find_enum(const char *str) noexcept
HERE.
Definition json.h:121
void write_int64(int64_t index, int64_t x) noexcept
HERE.
Definition json.h:242
int64_t current_instruction() const noexcept
HERE.
Definition json.h:74
int64_t argument1() const noexcept
HERE.
Definition json.h:84
int64_t num_outputs() const
HERE.
Definition json.h:280
bool end_object(int64_t keytableheader_instruction) const noexcept
HERE.
Definition json.h:217
void step_backward() noexcept
HERE.
Definition json.h:104
void write_uint8(int64_t index, uint8_t x) noexcept
HERE.
Definition json.h:232
int64_t get_and_increment(int64_t index) noexcept
HERE.
Definition json.h:262
void write_add_int64(int64_t index, int64_t x) noexcept
HERE.
Definition json.h:252
int64_t argument2() const noexcept
HERE.
Definition json.h:89
void add_to_length(int64_t length) noexcept
HERE.
Definition json.h:272
std::string output_name(int64_t i) const
HERE.
Definition json.h:285
void write_int8(int64_t index, int8_t x) noexcept
HERE.
Definition json.h:227
void push_stack(int64_t jump_to) noexcept
HERE.
Definition json.h:109
int64_t find_key(const char *str) noexcept
HERE.
Definition json.h:138
int64_t argument3() const noexcept
HERE.
Definition json.h:94
std::string debug() const noexcept
HERE.
int64_t output_num_items(int64_t i) const
HERE.
Definition json.h:306
std::string output_dtype(int64_t i) const
HERE.
Definition json.h:290
bool key_already_filled(int64_t record_identifier, int64_t j) const noexcept
HERE.
Definition json.h:197
int64_t key_instruction_at(int64_t i) const noexcept
HERE.
Definition json.h:203
int64_t current_stack_depth() const noexcept
HERE.
Definition json.h:69
#define EXPORT_SYMBOL
Definition common.h:14
Definition ArrayBuilder.h:14
EXPORT_SYMBOL void fromjsonobject(FileLikeObject *source, ArrayBuilder &builder, int64_t buffersize, bool read_one, const char *nan_string=nullptr, const char *posinf_string=nullptr, const char *neginf_string=nullptr)
Parses a JSON-encoded file-like object using an ArrayBuilder.