PLaSK library
Loading...
Searching...
No Matches
reader.hpp
Go to the documentation of this file.
1/*
2 * This file is part of PLaSK (https://plask.app) by Photonics Group at TUL
3 * Copyright (c) 2022 Lodz University of Technology
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 */
14#ifndef PLASK__UTILS_XML_READER_H
15#define PLASK__UTILS_XML_READER_H
16
17#include <string>
18#include <limits>
19#include <boost/lexical_cast.hpp>
20#include "../../optional.hpp"
21#include <boost/algorithm/string.hpp>
22#include <vector>
23#include <set>
24#include <map>
25
26#include <typeinfo>
27#include <typeindex>
28#include <functional>
29#include <type_traits>
30#include <boost/any.hpp>
31
32#include "exceptions.hpp"
33
34//this is copy paste from expat.h, this allows to not include expat.h in header
35extern "C" {
36 struct XML_ParserStruct;
37 typedef struct XML_ParserStruct *XML_Parser;
38}
39
40namespace plask {
41
48
49 typedef std::function<boost::any(const std::string&)> type_parser;
50
51 std::map<std::type_index, type_parser> parsers;
52
53 void set() {} //do nothing, defined for stop of set recursion and default constructor
54
55public:
56
62 template <typename... Functors>
63 StringInterpreter(Functors... parsers) { set(parsers...); }
64
69
79 template <typename RequiredType>
80 RequiredType get(const std::string& str) const {
81 auto i = parsers.find(std::type_index(typeid((RequiredType*)0)));
82 if (i != parsers.end())
83 return boost::any_cast<RequiredType>(i->second(str));
84 return boost::lexical_cast<RequiredType>(boost::trim_copy(str));
85 }
86
92 template <typename Functor1, typename... Functors>
94 parsers[std::type_index(typeid((typename std::result_of<Functor1(std::string)>::type*)0))] = parser1;
95 set(rest_parsers...);
96 }
97
104 template <typename type>
105 void unset() {
106 parsers.erase(std::type_index(typeid((type*)0)));
107 }
108
109};
110
119 public:
120
122 enum NodeType {
123 // NODE_NONE = irr::io::EXN_NONE, //< No xml node. This is usually the node if you did not read anything yet.
124 NODE_ELEMENT = 1 ,
125 NODE_ELEMENT_END = 2,
126 NODE_TEXT = 4
127 // NODE_COMMENT = irr::io::EXN_COMMENT, //< An xml comment like &lt;!-- I am a comment --&gt; or a DTD definition.
128 // NODE_CDATA = irr::io::EXN_CDATA, //< An xml cdata section like &lt;![CDATA[ this is some CDATA ]]&gt;
129 // NODE_UNKNOWN = irr::io::EXN_UNKNOWN //< Unknown element
130 };
131
134
143 virtual std::size_t read(char* buff, std::size_t buf_size) = 0;
144
146 virtual ~DataSource() {}
147 };
148
151
153 std::unique_ptr<std::istream> input;
154
158 template <typename... Args>
159 StreamDataSource(Args&&... params): input(std::forward<Args>(params)...) {}
160
161 std::size_t read(char* buff, std::size_t buf_size) override;
162
163 };
164
167
169 FILE* desc;
170
174 CFileDataSource(FILE* desc): desc(desc) {}
175
176 ~CFileDataSource() { fclose(desc); }
177
178 std::size_t read(char* buff, std::size_t buf_size) override;
179
180 };
181
182
184 template <typename EnumT>
186
187 protected:
188
190 const std::string attr_name;
192
193 std::map<std::string, EnumT> values;
194 std::string help;
195
196 EnumT parse(std::string value) {
197 if (case_insensitive) boost::to_lower(value);
198 auto found = values.find(value);
199 if (found == values.end())
200 throw XMLBadAttrException(reader, attr_name, value, "one of " + help);
201 return found->second;
202 }
203
204 public:
205
212 EnumAttributeReader(XMLReader& reader, const std::string& attr_name, bool case_sensitive=false):
213 reader(reader), attr_name(attr_name), case_insensitive(!case_sensitive) {}
214
221 EnumAttributeReader& value(std::string key, EnumT val, std::size_t min=std::numeric_limits<std::size_t>::max()) {
222 if (case_insensitive) boost::to_lower(key);
223# ifndef NDEBUG
224 if (values.find(key) != values.end()) throw XMLException(reader, "CODE ERROR: Attribute value \"" + key + "\" already defined.");
225# endif
226 help += values.empty()? "\"" : ", \"";
227 values[key] = val;
228 if (min < key.length()) {
229 std::string skey = key.substr(0, min);
230# ifndef NDEBUG
231 if (values.find(skey) != values.end()) throw XMLException(reader, "CODE ERROR: Attribute value \"" + skey + "\" already defined.");
232# endif
233 values[skey] = val;
234 help += skey; help += "["; help += key.substr(min); help += "]";
235 } else
236 help += key;
237 help += "\"";
238 return *this;
239 }
240
242 EnumT require() {
243 return parse(reader.requireAttribute(attr_name));
244 }
245
251 plask::optional<std::string> value = reader.getAttribute(attr_name);
252 if (!value) return plask::optional<EnumT>();
253 return parse(std::move(*value));
254 }
255
261 EnumT get(EnumT default_value) {
262 plask::optional<std::string> value = reader.getAttribute(attr_name);
263 if (!value) return default_value;
264 return parse(std::move(*value));
265 }
266
267 };
268
279
280 std::set<std::string> namesAlreadySeen;
281
282 public:
283
289 void operator()(const std::string& scope, std::string name) {
290 if (namesAlreadySeen.find(name) != namesAlreadySeen.end()) throw XMLDuplicatedElementException(scope, "tag <" + name + ">");
291 namesAlreadySeen.insert(std::move(name));
292 }
293
299 void operator()(const XMLReader& scope, std::string name) {
300 if (namesAlreadySeen.find(name) != namesAlreadySeen.end()) throw XMLDuplicatedElementException(scope, "tag <" + name + ">");
301 namesAlreadySeen.insert(std::move(name));
302 }
303
304 void operator()(const XMLReader& reader) {
305 this->operator ()(reader, reader.getNodeName());
306 }
307
308 };
309
311 typedef std::function<std::string(const std::string&)> Filter;
312
313 private:
314
315 static void startTag(void *data, const char *element, const char **attribute);
316 static void endTag(void *data, const char *element);
317 static void characterData(void* data, const char *string, int string_len);
318
320 std::unique_ptr<DataSource> source;
321
322 template <typename RequiredType>
323 RequiredType parse(const std::string& attr_str) const {
324 return stringInterpreter.get<RequiredType>(attr_str);
325 }
326
327 template <typename RequiredType>
328 RequiredType parse(const std::string& attr_str, const std::string& attr_name) const {
329 try {
331 } catch (...) {
332 throw XMLBadAttrException(*this, attr_name, attr_str);
333 }
334 }
335
339 struct State {
340
342 unsigned lineNr;
343
345 unsigned columnNr;
346
348 std::string text;
349
351 std::map<std::string, std::string> attributes;
352
354 NodeType type;
355
363 State(NodeType type, unsigned lineNr, unsigned columnNr, const std::string& text): lineNr(lineNr), columnNr(columnNr), text(text), type(type) {}
364
369 bool hasWhiteText() {
370 for (std::size_t i = 0; i < text.size(); ++i)
371 if (!isspace(text[i])) return false;
372 return true;
373 }
374 };
375
381 State& appendState(NodeType type, const std::string& text);
382
384 std::deque<State> states;
385
387 XML_Parser parser;
388
390 std::vector<std::string> path;
391
393 mutable std::set<std::string> read_attributes;
394
395public:
398
401
404
410 attributeFilter = f;
411 contentFilter = f;
412 }
413
414private:
415
417 mutable bool check_if_all_attributes_were_read;
418
423 bool hasCurrent() const {
424 if (states.empty()) return false;
425 return states.size() > 1 || states.front().type != NODE_TEXT;
426 }
427
431 void ensureHasCurrent() const {
432 if (!hasCurrent()) throw XMLException("XML reader: no current node (missing first read() call?)");
433 }
434
436 const State& getCurrent() const {
437 return states.front();
438 }
439
445 static bool strToBool(std::string str) {
446 boost::algorithm::to_lower(str);
447 if (str == "yes" || str == "true" || str == "1") return true;
448 else if (str == "no" || str == "false" || str == "0") return false;
449 else throw XMLException("\"" + str + "\" is not valid bool value.");
450 }
451
458 static unsigned strToUnsigned(std::string str) {
459 int value = boost::lexical_cast<int>(boost::trim_copy(str));
460 if (value < 0) throw XMLException("negative value given for unsigned");
461 return unsigned(value);
462 }
463
468 bool readSome();
469
471 void initParser();
472
473 public:
474
479 void throwException(const std::string& msg) const { throw XMLException(*this, msg); }
480
485
490 template <typename... Args>
491 void throwUnexpectedElementException(Args&&... args) const { throw XMLUnexpectedElementException(*this, std::forward<Args>(args)...); }
492
500 NodeType ensureNodeTypeIs(int required_types, const char* new_tag_name = nullptr) const;
501
506 XMLReader(std::unique_ptr<DataSource>&& source);
507
512 XMLReader(std::unique_ptr<std::istream>&& istream);
513
518 XMLReader(const char* file_name);
519
524 XMLReader(FILE* file);
525
526 /*
527 * Construct XML reader to read XML from given @p input stream.
528 * @param input input stream
529 */
530 //XMLReader(std::istream& input);
531
532#if (__cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X__)
538
544 XMLReader& operator=(XMLReader&& to_move);
545
547 XMLReader(const XMLReader& to_copy) = delete;
548 XMLReader& operator=(const XMLReader& to_copy) = delete;
549#else
550 private:
551 XMLReader(const XMLReader&) {}
552 XMLReader& operator=(const XMLReader&) { return *this; }
553 public:
554#endif
555
557 ~XMLReader();
558
563 void swap(XMLReader& to_swap);
564
569 NodeType getNodeType() const { ensureHasCurrent(); return getCurrent().type; }
570
575 unsigned getLineNr() const { ensureHasCurrent(); return getCurrent().lineNr; }
576
581 unsigned getColumnNr() const { ensureHasCurrent(); return getCurrent().columnNr; }
582
587 bool next();
588
589 /*
590 * Check if node is empty, like <foo />.
591 *
592 * Note that empty nodes are communicate by parser two times: as NODE_ELEMENT and next as NODE_ELEMENT_END.
593 * So for <foo /> parser work just like for <foo></foo> and only this method allow to check which notation was used.
594 * @return if an element is an empty element, like <foo />
595 */
596 //bool isEmptyElement() const { return irrReader->isEmptyElement(); }
597
603 const std::vector<std::string>& getPath() const { return path; }
604
609 std::size_t getLevel() const { return path.size(); }
610
617 std::size_t getAttributeCount() const { ensureHasCurrent(); return getCurrent().attributes.size(); }
618
623 const std::map<std::string, std::string> getAttributes() const;
624
629 void ignoreAttribute(const std::string& name) { getAttribute(name); }
630
634 void ignoreAllAttributes() const { check_if_all_attributes_were_read = false; }
635
641 bool hasAttribute(const std::string& name) const { return bool(getAttribute(name)); }
642
648 template <typename EnumT>
649 EnumAttributeReader<EnumT> enumAttribute(const std::string attr_name, bool case_sensitive=false) {
650 return EnumAttributeReader<EnumT>(*this, attr_name, case_sensitive);
651 }
652
656 void removeAlienNamespaceAttr();
657
664 std::string getNodeName() const;
665
672 inline std::string getTagName() const { return getNodeName(); }
673
678 std::string getTextContent() const;
679
684 template <typename T>
685 inline T getTextContent() const {
686 return parse<T>(getTextContent());
687 }
688
696 template <typename T>
697 inline T getAttribute(const std::string& name, const T& default_value) const {
698 plask::optional<std::string> attr_str = getAttribute(name);
699 if (attr_str) {
700 return parse<T>(*attr_str, name);
701 } else
702 return default_value;
703 }
704
710 plask::optional<std::string> getAttribute(const std::string& name) const;
711
720 template <typename T>
721 inline plask::optional<T> getAttribute(const std::string& name) const {
722 plask::optional<std::string> attr_str = getAttribute(name);
723 if (!attr_str) return plask::optional<T>();
724 return parse<T>(*attr_str, name);
725 }
726
733 std::string requireAttribute(const std::string& attr_name) const;
734
742 template <typename T>
743 inline T requireAttribute(const std::string& name) const {
744 return parse<T>(requireAttribute(name), name);
745 }
746
751 void requireNext();
752
762 NodeType requireNext(int required_types, const char *new_tag_name = nullptr);
763
767 void requireTag();
768
773 void requireTag(const std::string& name);
774
780 bool requireTagOrEnd();
781
788 bool requireTagOrEnd(const std::string &name);
789
793 void requireTagEnd();
794
799 std::string requireText();
800
805 std::string requireTextInCurrentTag();
806
811 template <typename T>
812 inline T requireText() {
813 return parse<T>(requireText());
814 }
815
820 template <typename T>
822 return parse<T>(requireTextInCurrentTag());
823 }
824
831 bool gotoNextOnLevel(std::size_t required_level, NodeType required_type = NODE_ELEMENT);
832
837 bool gotoNextTagOnCurrentLevel();
838
842 void gotoEndOfCurrentTag();
843
844};
845
846} // namespace plask
847
848
849namespace std {
850template<>
851inline void swap(plask::XMLReader& a, plask::XMLReader& b) { a.swap(b); }
852} // namespace std
853
854#endif // PLASK__UTILS_XML_READER_H