Alexandria  2.25.0
SDC-CH common library for the Euclid project
NpyCommon.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2021 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
21 
22 namespace Euclid {
23 namespace NdArray {
24 
25 void parseSingleValue(const std::string& descr, bool& big_endian, std::string& dtype) {
26  big_endian = (descr.front() == '>');
27  dtype = descr.substr(1);
28 }
29 
30 inline void parseFieldValues(const std::string& descr, bool& big_endian, std::vector<std::string>& attrs,
31  std::string& dtype) {
32  static const regex::regex field_expr("\\('([^']*)',\\s*'([^']*)'\\)");
33 
35  auto start = descr.begin();
36  auto end = descr.end();
37 
38  while (regex::regex_search(start, end, match, field_expr)) {
39  bool endian_aux;
40  std::string dtype_aux;
41 
42  parseSingleValue(match[2].str(), endian_aux, dtype_aux);
43  if (dtype.empty()) {
44  dtype = dtype_aux;
45  big_endian = endian_aux;
46  } else if (dtype != dtype_aux || big_endian != endian_aux) {
47  throw std::invalid_argument("NdArray only supports uniform types");
48  }
49  attrs.emplace_back(match[1].str());
50 
51  start = match[0].second;
52  }
53 }
54 
55 inline void parseNpyDict(const std::string& header, bool& fortran_order, bool& big_endian, std::string& dtype,
56  std::vector<size_t>& shape, std::vector<std::string>& attrs, size_t& n_elements) {
57  auto loc = header.find("fortran_order") + 16;
58  fortran_order = (header.substr(loc, 4) == "True");
59 
60  loc = header.find("descr") + 8;
61 
62  if (header[loc] == '\'') {
63  auto end = header.find('\'', loc + 1);
64  parseSingleValue(header.substr(loc + 1, end - loc - 1), big_endian, dtype);
65  } else if (header[loc] == '[') {
66  auto end = header.find(']', loc + 1);
67  parseFieldValues(header.substr(loc + 1, end - loc - 1), big_endian, attrs, dtype);
68  } else {
69  throw Elements::Exception() << "Failed to parse the array description: " << header;
70  }
71 
72  loc = header.find("shape") + 9;
73  auto loc2 = header.find(')', loc);
74  auto shape_str = header.substr(loc, loc2 - loc);
75  if (shape_str.back() == ',')
76  shape_str.resize(shape_str.size() - 1);
77  shape = stringToVector<size_t>(shape_str);
78  n_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
79 }
80 
82  size_t& n_elements) {
83  // Magic
84  char magic[6];
85  input.read(magic, sizeof(magic));
86  if (std::memcmp(magic, NPY_MAGIC, sizeof(NPY_MAGIC)) != 0) {
87  throw Elements::Exception() << "Unexpected magic sequence";
88  }
89 
90  // Version and header len
91  little_uint32_t header_len;
92  little_uint16_t version;
93  input.read(reinterpret_cast<char*>(&version), sizeof(version));
94  if (version > 30) {
95  throw Elements::Exception() << "Only numpy arrays with version 3 or less are supported";
96  } else if (version.data()[0] == 1) {
97  // 16 bits integer in little endian
98  little_uint16_t aux;
99  input.read(reinterpret_cast<char*>(&aux), sizeof(aux));
100  header_len = aux;
101  } else {
102  // 32 bits integer in little endian
103  input.read(reinterpret_cast<char*>(&header_len), sizeof(header_len));
104  }
105 
106  // Read header
107  std::string header(header_len, '\0');
108  input.read(&header[0], header_len);
109 
110  // Parse header
111  bool fortran_order, big_endian;
112  parseNpyDict(header, fortran_order, big_endian, dtype, shape, attrs, n_elements);
113 
114  if (fortran_order)
115  throw Elements::Exception() << "Fortran order not supported";
116 
117  if ((big_endian && (BYTE_ORDER != BIG_ENDIAN)) || (!big_endian && (BYTE_ORDER != LITTLE_ENDIAN)))
118  throw Elements::Exception() << "Only native endianness supported for reading";
119 }
120 
121 } // namespace NdArray
122 } // namespace Euclid
T accumulate(T... args)
T begin(T... args)
T emplace_back(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T front(T... args)
T memcmp(T... args)
void parseSingleValue(const std::string &descr, bool &big_endian, std::string &dtype)
Definition: NpyCommon.cpp:25
void readNpyHeader(std::istream &input, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition: NpyCommon.cpp:81
void parseFieldValues(const std::string &descr, bool &big_endian, std::vector< std::string > &attrs, std::string &dtype)
Definition: NpyCommon.cpp:30
void parseNpyDict(const std::string &header, bool &fortran_order, bool &big_endian, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition: NpyCommon.cpp:55
constexpr const char NPY_MAGIC[]
Definition: NpyCommon.h:37
T read(T... args)
T substr(T... args)