Reader.php 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. <?php
  2. declare(strict_types=1);
  3. namespace Sabre\Xml;
  4. use XMLReader;
  5. /**
  6. * The Reader class expands upon PHP's built-in XMLReader.
  7. *
  8. * The intended usage, is to assign certain XML elements to PHP classes. These
  9. * need to be registered using the $elementMap public property.
  10. *
  11. * After this is done, a single call to parse() will parse the entire document,
  12. * and delegate sub-sections of the document to element classes.
  13. *
  14. * @copyright Copyright (C) 2009-2015 fruux GmbH (https://fruux.com/).
  15. * @author Evert Pot (http://evertpot.com/)
  16. * @license http://sabre.io/license/ Modified BSD License
  17. */
  18. class Reader extends \XMLReader
  19. {
  20. use ContextStackTrait;
  21. /**
  22. * Returns the current nodename in clark-notation.
  23. *
  24. * For example: "{http://www.w3.org/2005/Atom}feed".
  25. * Or if no namespace is defined: "{}feed".
  26. *
  27. * This method returns null if we're not currently on an element.
  28. *
  29. * @return string|null
  30. */
  31. public function getClark()
  32. {
  33. if (!$this->localName) {
  34. return null;
  35. }
  36. return '{'.$this->namespaceURI.'}'.$this->localName;
  37. }
  38. /**
  39. * Reads the entire document.
  40. *
  41. * This function returns an array with the following three elements:
  42. * * name - The root element name.
  43. * * value - The value for the root element.
  44. * * attributes - An array of attributes.
  45. *
  46. * This function will also disable the standard libxml error handler (which
  47. * usually just results in PHP errors), and throw exceptions instead.
  48. */
  49. public function parse(): array
  50. {
  51. $previousEntityState = null;
  52. $shouldCallLibxmlDisableEntityLoader = (\LIBXML_VERSION < 20900);
  53. if ($shouldCallLibxmlDisableEntityLoader) {
  54. $previousEntityState = libxml_disable_entity_loader(true);
  55. }
  56. $previousSetting = libxml_use_internal_errors(true);
  57. try {
  58. while (self::ELEMENT !== $this->nodeType) {
  59. if (!$this->read()) {
  60. $errors = libxml_get_errors();
  61. libxml_clear_errors();
  62. if ($errors) {
  63. throw new LibXMLException($errors);
  64. }
  65. }
  66. }
  67. $result = $this->parseCurrentElement();
  68. // last line of defense in case errors did occur above
  69. $errors = libxml_get_errors();
  70. libxml_clear_errors();
  71. if ($errors) {
  72. throw new LibXMLException($errors);
  73. }
  74. } finally {
  75. libxml_use_internal_errors($previousSetting);
  76. if ($shouldCallLibxmlDisableEntityLoader) {
  77. libxml_disable_entity_loader($previousEntityState);
  78. }
  79. }
  80. return $result;
  81. }
  82. /**
  83. * parseGetElements parses everything in the current sub-tree,
  84. * and returns an array of elements.
  85. *
  86. * Each element has a 'name', 'value' and 'attributes' key.
  87. *
  88. * If the element didn't contain sub-elements, an empty array is always
  89. * returned. If there was any text inside the element, it will be
  90. * discarded.
  91. *
  92. * If the $elementMap argument is specified, the existing elementMap will
  93. * be overridden while parsing the tree, and restored after this process.
  94. */
  95. public function parseGetElements(?array $elementMap = null): array
  96. {
  97. $result = $this->parseInnerTree($elementMap);
  98. if (!is_array($result)) {
  99. return [];
  100. }
  101. return $result;
  102. }
  103. /**
  104. * Parses all elements below the current element.
  105. *
  106. * This method will return a string if this was a text-node, or an array if
  107. * there were sub-elements.
  108. *
  109. * If there's both text and sub-elements, the text will be discarded.
  110. *
  111. * If the $elementMap argument is specified, the existing elementMap will
  112. * be overridden while parsing the tree, and restored after this process.
  113. *
  114. * @return array|string|null
  115. */
  116. public function parseInnerTree(?array $elementMap = null)
  117. {
  118. $text = null;
  119. $elements = [];
  120. if (self::ELEMENT === $this->nodeType && $this->isEmptyElement) {
  121. // Easy!
  122. $this->next();
  123. return null;
  124. }
  125. if (!is_null($elementMap)) {
  126. $this->pushContext();
  127. $this->elementMap = $elementMap;
  128. }
  129. try {
  130. if (!$this->read()) {
  131. $errors = libxml_get_errors();
  132. libxml_clear_errors();
  133. if ($errors) {
  134. throw new LibXMLException($errors);
  135. }
  136. throw new ParseException('This should never happen (famous last words)');
  137. }
  138. $keepOnParsing = true;
  139. while ($keepOnParsing) {
  140. if (!$this->isValid()) {
  141. $errors = libxml_get_errors();
  142. if ($errors) {
  143. libxml_clear_errors();
  144. throw new LibXMLException($errors);
  145. }
  146. }
  147. switch ($this->nodeType) {
  148. case self::ELEMENT:
  149. $elements[] = $this->parseCurrentElement();
  150. break;
  151. case self::TEXT:
  152. case self::CDATA:
  153. $text .= $this->value;
  154. $this->read();
  155. break;
  156. case self::END_ELEMENT:
  157. // Ensuring we are moving the cursor after the end element.
  158. $this->read();
  159. $keepOnParsing = false;
  160. break;
  161. case self::NONE:
  162. throw new ParseException('We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.');
  163. default:
  164. // Advance to the next element
  165. $this->read();
  166. break;
  167. }
  168. }
  169. } finally {
  170. if (!is_null($elementMap)) {
  171. $this->popContext();
  172. }
  173. }
  174. return $elements ? $elements : $text;
  175. }
  176. /**
  177. * Reads all text below the current element, and returns this as a string.
  178. */
  179. public function readText(): string
  180. {
  181. $result = '';
  182. $previousDepth = $this->depth;
  183. while ($this->read() && $this->depth != $previousDepth) {
  184. if (in_array($this->nodeType, [\XMLReader::TEXT, \XMLReader::CDATA, \XMLReader::WHITESPACE])) {
  185. $result .= $this->value;
  186. }
  187. }
  188. return $result;
  189. }
  190. /**
  191. * Parses the current XML element.
  192. *
  193. * This method returns arn array with 3 properties:
  194. * * name - A clark-notation XML element name.
  195. * * value - The parsed value.
  196. * * attributes - A key-value list of attributes.
  197. */
  198. public function parseCurrentElement(): array
  199. {
  200. $name = $this->getClark();
  201. $attributes = [];
  202. if ($this->hasAttributes) {
  203. $attributes = $this->parseAttributes();
  204. }
  205. $value = call_user_func(
  206. $this->getDeserializerForElementName((string) $name),
  207. $this
  208. );
  209. return [
  210. 'name' => $name,
  211. 'value' => $value,
  212. 'attributes' => $attributes,
  213. ];
  214. }
  215. /**
  216. * Grabs all the attributes from the current element, and returns them as a
  217. * key-value array.
  218. *
  219. * If the attributes are part of the same namespace, they will simply be
  220. * short keys. If they are defined on a different namespace, the attribute
  221. * name will be returned in clark-notation.
  222. */
  223. public function parseAttributes(): array
  224. {
  225. $attributes = [];
  226. while ($this->moveToNextAttribute()) {
  227. if ($this->namespaceURI) {
  228. // Ignoring 'xmlns', it doesn't make any sense.
  229. if ('http://www.w3.org/2000/xmlns/' === $this->namespaceURI) {
  230. continue;
  231. }
  232. $name = $this->getClark();
  233. $attributes[$name] = $this->value;
  234. } else {
  235. $attributes[$this->localName] = $this->value;
  236. }
  237. }
  238. $this->moveToElement();
  239. return $attributes;
  240. }
  241. /**
  242. * Returns the function that should be used to parse the element identified
  243. * by its clark-notation name.
  244. */
  245. public function getDeserializerForElementName(string $name): callable
  246. {
  247. if (!array_key_exists($name, $this->elementMap)) {
  248. if ('{}' == substr($name, 0, 2) && array_key_exists(substr($name, 2), $this->elementMap)) {
  249. $name = substr($name, 2);
  250. } else {
  251. return ['Sabre\\Xml\\Element\\Base', 'xmlDeserialize'];
  252. }
  253. }
  254. $deserializer = $this->elementMap[$name];
  255. if (is_subclass_of($deserializer, 'Sabre\\Xml\\XmlDeserializable')) {
  256. return [$deserializer, 'xmlDeserialize'];
  257. }
  258. if (is_callable($deserializer)) {
  259. return $deserializer;
  260. }
  261. $type = gettype($deserializer);
  262. if ('string' === $type) {
  263. $type .= ' ('.$deserializer.')';
  264. } elseif ('object' === $type) {
  265. $type .= ' ('.get_class($deserializer).')';
  266. }
  267. throw new \LogicException('Could not use this type as a deserializer: '.$type.' for element: '.$name);
  268. }
  269. }