MimeDir.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. <?php
  2. namespace Sabre\VObject\Parser;
  3. use Sabre\VObject\Component;
  4. use Sabre\VObject\Component\VCalendar;
  5. use Sabre\VObject\Component\VCard;
  6. use Sabre\VObject\Document;
  7. use Sabre\VObject\EofException;
  8. use Sabre\VObject\Node;
  9. use Sabre\VObject\ParseException;
  10. /**
  11. * MimeDir parser.
  12. *
  13. * This class parses iCalendar 2.0 and vCard 2.1, 3.0 and 4.0 files. This
  14. * parser will return one of the following two objects from the parse method:
  15. *
  16. * Sabre\VObject\Component\VCalendar
  17. * Sabre\VObject\Component\VCard
  18. *
  19. * @copyright Copyright (C) fruux GmbH (https://fruux.com/)
  20. * @author Evert Pot (http://evertpot.com/)
  21. * @license http://sabre.io/license/ Modified BSD License
  22. */
  23. class MimeDir extends Parser
  24. {
  25. /**
  26. * The input stream.
  27. *
  28. * @var resource
  29. */
  30. protected $input;
  31. /**
  32. * Root component.
  33. *
  34. * @var Component
  35. */
  36. protected $root;
  37. /**
  38. * By default all input will be assumed to be UTF-8.
  39. *
  40. * However, both iCalendar and vCard might be encoded using different
  41. * character sets. The character set is usually set in the mime-type.
  42. *
  43. * If this is the case, use setEncoding to specify that a different
  44. * encoding will be used. If this is set, the parser will automatically
  45. * convert all incoming data to UTF-8.
  46. *
  47. * @var string
  48. */
  49. protected $charset = 'UTF-8';
  50. /**
  51. * The list of character sets we support when decoding.
  52. *
  53. * This would be a const expression but for now we need to support PHP 5.5
  54. */
  55. protected static $SUPPORTED_CHARSETS = [
  56. 'UTF-8',
  57. 'ISO-8859-1',
  58. 'Windows-1252',
  59. ];
  60. /**
  61. * Parses an iCalendar or vCard file.
  62. *
  63. * Pass a stream or a string. If null is parsed, the existing buffer is
  64. * used.
  65. *
  66. * @param string|resource|null $input
  67. * @param int $options
  68. *
  69. * @return \Sabre\VObject\Document
  70. */
  71. public function parse($input = null, $options = 0)
  72. {
  73. $this->root = null;
  74. if (!is_null($input)) {
  75. $this->setInput($input);
  76. }
  77. if (!\is_resource($this->input)) {
  78. // Null was passed as input, but there was no existing input buffer
  79. // There is nothing to parse.
  80. throw new ParseException('No input provided to parse');
  81. }
  82. if (0 !== $options) {
  83. $this->options = $options;
  84. }
  85. $this->parseDocument();
  86. return $this->root;
  87. }
  88. /**
  89. * By default all input will be assumed to be UTF-8.
  90. *
  91. * However, both iCalendar and vCard might be encoded using different
  92. * character sets. The character set is usually set in the mime-type.
  93. *
  94. * If this is the case, use setEncoding to specify that a different
  95. * encoding will be used. If this is set, the parser will automatically
  96. * convert all incoming data to UTF-8.
  97. *
  98. * @param string $charset
  99. */
  100. public function setCharset($charset)
  101. {
  102. if (!in_array($charset, self::$SUPPORTED_CHARSETS)) {
  103. throw new \InvalidArgumentException('Unsupported encoding. (Supported encodings: '.implode(', ', self::$SUPPORTED_CHARSETS).')');
  104. }
  105. $this->charset = $charset;
  106. }
  107. /**
  108. * Sets the input buffer. Must be a string or stream.
  109. *
  110. * @param resource|string $input
  111. */
  112. public function setInput($input)
  113. {
  114. // Resetting the parser
  115. $this->lineIndex = 0;
  116. $this->startLine = 0;
  117. if (is_string($input)) {
  118. // Converting to a stream.
  119. $stream = fopen('php://temp', 'r+');
  120. fwrite($stream, $input);
  121. rewind($stream);
  122. $this->input = $stream;
  123. } elseif (is_resource($input)) {
  124. $this->input = $input;
  125. } else {
  126. throw new \InvalidArgumentException('This parser can only read from strings or streams.');
  127. }
  128. }
  129. /**
  130. * Parses an entire document.
  131. */
  132. protected function parseDocument()
  133. {
  134. $line = $this->readLine();
  135. // BOM is ZERO WIDTH NO-BREAK SPACE (U+FEFF).
  136. // It's 0xEF 0xBB 0xBF in UTF-8 hex.
  137. if (3 <= strlen($line)
  138. && 0xef === ord($line[0])
  139. && 0xbb === ord($line[1])
  140. && 0xbf === ord($line[2])) {
  141. $line = substr($line, 3);
  142. }
  143. switch (strtoupper($line)) {
  144. case 'BEGIN:VCALENDAR':
  145. $class = VCalendar::$componentMap['VCALENDAR'];
  146. break;
  147. case 'BEGIN:VCARD':
  148. $class = VCard::$componentMap['VCARD'];
  149. break;
  150. default:
  151. throw new ParseException('This parser only supports VCARD and VCALENDAR files');
  152. }
  153. $this->root = new $class([], false);
  154. while (true) {
  155. // Reading until we hit END:
  156. try {
  157. $line = $this->readLine();
  158. } catch (EofException $oEx) {
  159. $line = 'END:'.$this->root->name;
  160. }
  161. if ('END:' === strtoupper(substr($line, 0, 4))) {
  162. break;
  163. }
  164. $result = $this->parseLine($line);
  165. if ($result) {
  166. $this->root->add($result);
  167. }
  168. }
  169. $name = strtoupper(substr($line, 4));
  170. if ($name !== $this->root->name) {
  171. throw new ParseException('Invalid MimeDir file. expected: "END:'.$this->root->name.'" got: "END:'.$name.'"');
  172. }
  173. }
  174. /**
  175. * Parses a line, and if it hits a component, it will also attempt to parse
  176. * the entire component.
  177. *
  178. * @param string $line Unfolded line
  179. *
  180. * @return Node
  181. */
  182. protected function parseLine($line)
  183. {
  184. // Start of a new component
  185. if ('BEGIN:' === strtoupper(substr($line, 0, 6))) {
  186. if (substr($line, 6) === $this->root->name) {
  187. throw new ParseException('Invalid MimeDir file. Unexpected component: "'.$line.'" in document type '.$this->root->name);
  188. }
  189. $component = $this->root->createComponent(substr($line, 6), [], false);
  190. while (true) {
  191. // Reading until we hit END:
  192. $line = $this->readLine();
  193. if ('END:' === strtoupper(substr($line, 0, 4))) {
  194. break;
  195. }
  196. $result = $this->parseLine($line);
  197. if ($result) {
  198. $component->add($result);
  199. }
  200. }
  201. $name = strtoupper(substr($line, 4));
  202. if ($name !== $component->name) {
  203. throw new ParseException('Invalid MimeDir file. expected: "END:'.$component->name.'" got: "END:'.$name.'"');
  204. }
  205. return $component;
  206. } else {
  207. // Property reader
  208. $property = $this->readProperty($line);
  209. if (!$property) {
  210. // Ignored line
  211. return false;
  212. }
  213. return $property;
  214. }
  215. }
  216. /**
  217. * We need to look ahead 1 line every time to see if we need to 'unfold'
  218. * the next line.
  219. *
  220. * If that was not the case, we store it here.
  221. *
  222. * @var string|null
  223. */
  224. protected $lineBuffer;
  225. /**
  226. * The real current line number.
  227. */
  228. protected $lineIndex = 0;
  229. /**
  230. * In the case of unfolded lines, this property holds the line number for
  231. * the start of the line.
  232. *
  233. * @var int
  234. */
  235. protected $startLine = 0;
  236. /**
  237. * Contains a 'raw' representation of the current line.
  238. *
  239. * @var string
  240. */
  241. protected $rawLine;
  242. /**
  243. * Reads a single line from the buffer.
  244. *
  245. * This method strips any newlines and also takes care of unfolding.
  246. *
  247. * @throws \Sabre\VObject\EofException
  248. *
  249. * @return string
  250. */
  251. protected function readLine()
  252. {
  253. if (!\is_null($this->lineBuffer)) {
  254. $rawLine = $this->lineBuffer;
  255. $this->lineBuffer = null;
  256. } else {
  257. do {
  258. $eof = \feof($this->input);
  259. $rawLine = \fgets($this->input);
  260. if ($eof || (\feof($this->input) && false === $rawLine)) {
  261. throw new EofException('End of document reached prematurely');
  262. }
  263. if (false === $rawLine) {
  264. throw new ParseException('Error reading from input stream');
  265. }
  266. $rawLine = \rtrim($rawLine, "\r\n");
  267. } while ('' === $rawLine); // Skipping empty lines
  268. ++$this->lineIndex;
  269. }
  270. $line = $rawLine;
  271. $this->startLine = $this->lineIndex;
  272. // Looking ahead for folded lines.
  273. while (true) {
  274. $nextLine = \rtrim(\fgets($this->input), "\r\n");
  275. ++$this->lineIndex;
  276. if (!$nextLine) {
  277. break;
  278. }
  279. if ("\t" === $nextLine[0] || ' ' === $nextLine[0]) {
  280. $curLine = \substr($nextLine, 1);
  281. $line .= $curLine;
  282. $rawLine .= "\n ".$curLine;
  283. } else {
  284. $this->lineBuffer = $nextLine;
  285. break;
  286. }
  287. }
  288. $this->rawLine = $rawLine;
  289. return $line;
  290. }
  291. /**
  292. * Reads a property or component from a line.
  293. */
  294. protected function readProperty($line)
  295. {
  296. if ($this->options & self::OPTION_FORGIVING) {
  297. $propNameToken = 'A-Z0-9\-\._\\/';
  298. } else {
  299. $propNameToken = 'A-Z0-9\-\.';
  300. }
  301. $paramNameToken = 'A-Z0-9\-';
  302. $safeChar = '^";:,';
  303. $qSafeChar = '^"';
  304. $regex = "/
  305. ^(?P<name> [$propNameToken]+ ) (?=[;:]) # property name
  306. |
  307. (?<=:)(?P<propValue> .+)$ # property value
  308. |
  309. ;(?P<paramName> [$paramNameToken]+) (?=[=;:]) # parameter name
  310. |
  311. (=|,)(?P<paramValue> # parameter value
  312. (?: [$safeChar]*) |
  313. \"(?: [$qSafeChar]+)\"
  314. ) (?=[;:,])
  315. /xi";
  316. //echo $regex, "\n"; exit();
  317. preg_match_all($regex, $line, $matches, PREG_SET_ORDER);
  318. $property = [
  319. 'name' => null,
  320. 'parameters' => [],
  321. 'value' => null,
  322. ];
  323. $lastParam = null;
  324. /*
  325. * Looping through all the tokens.
  326. *
  327. * Note that we are looping through them in reverse order, because if a
  328. * sub-pattern matched, the subsequent named patterns will not show up
  329. * in the result.
  330. */
  331. foreach ($matches as $match) {
  332. if (isset($match['paramValue'])) {
  333. if ($match['paramValue'] && '"' === $match['paramValue'][0]) {
  334. $value = substr($match['paramValue'], 1, -1);
  335. } else {
  336. $value = $match['paramValue'];
  337. }
  338. $value = $this->unescapeParam($value);
  339. if (is_null($lastParam)) {
  340. if ($this->options & self::OPTION_IGNORE_INVALID_LINES) {
  341. // When the property can't be matched and the configuration
  342. // option is set to ignore invalid lines, we ignore this line
  343. // This can happen when servers provide faulty data as iCloud
  344. // frequently does with X-APPLE-STRUCTURED-LOCATION
  345. continue;
  346. }
  347. throw new ParseException('Invalid Mimedir file. Line starting at '.$this->startLine.' did not follow iCalendar/vCard conventions');
  348. }
  349. if (is_null($property['parameters'][$lastParam])) {
  350. $property['parameters'][$lastParam] = $value;
  351. } elseif (is_array($property['parameters'][$lastParam])) {
  352. $property['parameters'][$lastParam][] = $value;
  353. } elseif ($property['parameters'][$lastParam] === $value) {
  354. // When the current value of the parameter is the same as the
  355. // new one, then we can leave the current parameter as it is.
  356. } else {
  357. $property['parameters'][$lastParam] = [
  358. $property['parameters'][$lastParam],
  359. $value,
  360. ];
  361. }
  362. continue;
  363. }
  364. if (isset($match['paramName'])) {
  365. $lastParam = strtoupper($match['paramName']);
  366. if (!isset($property['parameters'][$lastParam])) {
  367. $property['parameters'][$lastParam] = null;
  368. }
  369. continue;
  370. }
  371. if (isset($match['propValue'])) {
  372. $property['value'] = $match['propValue'];
  373. continue;
  374. }
  375. if (isset($match['name']) && $match['name']) {
  376. $property['name'] = strtoupper($match['name']);
  377. continue;
  378. }
  379. // @codeCoverageIgnoreStart
  380. throw new \LogicException('This code should not be reachable');
  381. // @codeCoverageIgnoreEnd
  382. }
  383. if (is_null($property['value'])) {
  384. $property['value'] = '';
  385. }
  386. if (!$property['name']) {
  387. if ($this->options & self::OPTION_IGNORE_INVALID_LINES) {
  388. return false;
  389. }
  390. throw new ParseException('Invalid Mimedir file. Line starting at '.$this->startLine.' did not follow iCalendar/vCard conventions');
  391. }
  392. // vCard 2.1 states that parameters may appear without a name, and only
  393. // a value. We can deduce the value based on its name.
  394. //
  395. // Our parser will get those as parameters without a value instead, so
  396. // we're filtering these parameters out first.
  397. $namedParameters = [];
  398. $namelessParameters = [];
  399. foreach ($property['parameters'] as $name => $value) {
  400. if (!is_null($value)) {
  401. $namedParameters[$name] = $value;
  402. } else {
  403. $namelessParameters[] = $name;
  404. }
  405. }
  406. $propObj = $this->root->createProperty($property['name'], null, $namedParameters, null, $this->startLine, $line);
  407. foreach ($namelessParameters as $namelessParameter) {
  408. $propObj->add(null, $namelessParameter);
  409. }
  410. if (isset($propObj['ENCODING']) && 'QUOTED-PRINTABLE' === strtoupper($propObj['ENCODING'])) {
  411. $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue());
  412. } else {
  413. $charset = $this->charset;
  414. if (Document::VCARD21 === $this->root->getDocumentType() && isset($propObj['CHARSET'])) {
  415. // vCard 2.1 allows the character set to be specified per property.
  416. $charset = (string) $propObj['CHARSET'];
  417. }
  418. switch (strtolower($charset)) {
  419. case 'utf-8':
  420. break;
  421. case 'windows-1252':
  422. case 'iso-8859-1':
  423. $property['value'] = mb_convert_encoding($property['value'], 'UTF-8', $charset);
  424. break;
  425. default:
  426. throw new ParseException('Unsupported CHARSET: '.$propObj['CHARSET']);
  427. }
  428. $propObj->setRawMimeDirValue($property['value']);
  429. }
  430. return $propObj;
  431. }
  432. /**
  433. * Unescapes a property value.
  434. *
  435. * vCard 2.1 says:
  436. * * Semi-colons must be escaped in some property values, specifically
  437. * ADR, ORG and N.
  438. * * Semi-colons must be escaped in parameter values, because semi-colons
  439. * are also use to separate values.
  440. * * No mention of escaping backslashes with another backslash.
  441. * * newlines are not escaped either, instead QUOTED-PRINTABLE is used to
  442. * span values over more than 1 line.
  443. *
  444. * vCard 3.0 says:
  445. * * (rfc2425) Backslashes, newlines (\n or \N) and comma's must be
  446. * escaped, all time time.
  447. * * Comma's are used for delimiters in multiple values
  448. * * (rfc2426) Adds to to this that the semi-colon MUST also be escaped,
  449. * as in some properties semi-colon is used for separators.
  450. * * Properties using semi-colons: N, ADR, GEO, ORG
  451. * * Both ADR and N's individual parts may be broken up further with a
  452. * comma.
  453. * * Properties using commas: NICKNAME, CATEGORIES
  454. *
  455. * vCard 4.0 (rfc6350) says:
  456. * * Commas must be escaped.
  457. * * Semi-colons may be escaped, an unescaped semi-colon _may_ be a
  458. * delimiter, depending on the property.
  459. * * Backslashes must be escaped
  460. * * Newlines must be escaped as either \N or \n.
  461. * * Some compound properties may contain multiple parts themselves, so a
  462. * comma within a semi-colon delimited property may also be unescaped
  463. * to denote multiple parts _within_ the compound property.
  464. * * Text-properties using semi-colons: N, ADR, ORG, CLIENTPIDMAP.
  465. * * Text-properties using commas: NICKNAME, RELATED, CATEGORIES, PID.
  466. *
  467. * Even though the spec says that commas must always be escaped, the
  468. * example for GEO in Section 6.5.2 seems to violate this.
  469. *
  470. * iCalendar 2.0 (rfc5545) says:
  471. * * Commas or semi-colons may be used as delimiters, depending on the
  472. * property.
  473. * * Commas, semi-colons, backslashes, newline (\N or \n) are always
  474. * escaped, unless they are delimiters.
  475. * * Colons shall not be escaped.
  476. * * Commas can be considered the 'default delimiter' and is described as
  477. * the delimiter in cases where the order of the multiple values is
  478. * insignificant.
  479. * * Semi-colons are described as the delimiter for 'structured values'.
  480. * They are specifically used in Semi-colons are used as a delimiter in
  481. * REQUEST-STATUS, RRULE, GEO and EXRULE. EXRULE is deprecated however.
  482. *
  483. * Now for the parameters
  484. *
  485. * If delimiter is not set (empty string) this method will just return a string.
  486. * If it's a comma or a semi-colon the string will be split on those
  487. * characters, and always return an array.
  488. *
  489. * @param string $input
  490. * @param string $delimiter
  491. *
  492. * @return string|string[]
  493. */
  494. public static function unescapeValue($input, $delimiter = ';')
  495. {
  496. $regex = '# (?: (\\\\ (?: \\\\ | N | n | ; | , ) )';
  497. if ($delimiter) {
  498. $regex .= ' | ('.$delimiter.')';
  499. }
  500. $regex .= ') #x';
  501. $matches = preg_split($regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  502. $resultArray = [];
  503. $result = '';
  504. foreach ($matches as $match) {
  505. switch ($match) {
  506. case '\\\\':
  507. $result .= '\\';
  508. break;
  509. case '\N':
  510. case '\n':
  511. $result .= "\n";
  512. break;
  513. case '\;':
  514. $result .= ';';
  515. break;
  516. case '\,':
  517. $result .= ',';
  518. break;
  519. case $delimiter:
  520. $resultArray[] = $result;
  521. $result = '';
  522. break;
  523. default:
  524. $result .= $match;
  525. break;
  526. }
  527. }
  528. $resultArray[] = $result;
  529. return $delimiter ? $resultArray : $result;
  530. }
  531. /**
  532. * Unescapes a parameter value.
  533. *
  534. * vCard 2.1:
  535. * * Does not mention a mechanism for this. In addition, double quotes
  536. * are never used to wrap values.
  537. * * This means that parameters can simply not contain colons or
  538. * semi-colons.
  539. *
  540. * vCard 3.0 (rfc2425, rfc2426):
  541. * * Parameters _may_ be surrounded by double quotes.
  542. * * If this is not the case, semi-colon, colon and comma may simply not
  543. * occur (the comma used for multiple parameter values though).
  544. * * If it is surrounded by double-quotes, it may simply not contain
  545. * double-quotes.
  546. * * This means that a parameter can in no case encode double-quotes, or
  547. * newlines.
  548. *
  549. * vCard 4.0 (rfc6350)
  550. * * Behavior seems to be identical to vCard 3.0
  551. *
  552. * iCalendar 2.0 (rfc5545)
  553. * * Behavior seems to be identical to vCard 3.0
  554. *
  555. * Parameter escaping mechanism (rfc6868) :
  556. * * This rfc describes a new way to escape parameter values.
  557. * * New-line is encoded as ^n
  558. * * ^ is encoded as ^^.
  559. * * " is encoded as ^'
  560. *
  561. * @param string $input
  562. */
  563. private function unescapeParam($input)
  564. {
  565. return
  566. preg_replace_callback(
  567. '#(\^(\^|n|\'))#',
  568. function ($matches) {
  569. switch ($matches[2]) {
  570. case 'n':
  571. return "\n";
  572. case '^':
  573. return '^';
  574. case '\'':
  575. return '"';
  576. // @codeCoverageIgnoreStart
  577. }
  578. // @codeCoverageIgnoreEnd
  579. },
  580. $input
  581. );
  582. }
  583. /**
  584. * Gets the full quoted printable value.
  585. *
  586. * We need a special method for this, because newlines have both a meaning
  587. * in vCards, and in QuotedPrintable.
  588. *
  589. * This method does not do any decoding.
  590. *
  591. * @return string
  592. */
  593. private function extractQuotedPrintableValue()
  594. {
  595. // We need to parse the raw line again to get the start of the value.
  596. //
  597. // We are basically looking for the first colon (:), but we need to
  598. // skip over the parameters first, as they may contain one.
  599. $regex = '/^
  600. (?: [^:])+ # Anything but a colon
  601. (?: "[^"]")* # A parameter in double quotes
  602. : # start of the value we really care about
  603. (.*)$
  604. /xs';
  605. preg_match($regex, $this->rawLine, $matches);
  606. $value = $matches[1];
  607. // Removing the first whitespace character from every line. Kind of
  608. // like unfolding, but we keep the newline.
  609. $value = str_replace("\n ", "\n", $value);
  610. // Microsoft products don't always correctly fold lines, they may be
  611. // missing a whitespace. So if 'forgiving' is turned on, we will take
  612. // those as well.
  613. if ($this->options & self::OPTION_FORGIVING) {
  614. while ('=' === substr($value, -1) && $this->lineBuffer) {
  615. // Reading the line
  616. $this->readLine();
  617. // Grabbing the raw form
  618. $value .= "\n".$this->rawLine;
  619. }
  620. }
  621. return $value;
  622. }
  623. }