functions.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. <?php
  2. declare(strict_types=1);
  3. namespace Sabre\Uri;
  4. /**
  5. * This file contains all the uri handling functions.
  6. *
  7. * @copyright Copyright (C) fruux GmbH (https://fruux.com/)
  8. * @author Evert Pot (http://evertpot.com/)
  9. * @license http://sabre.io/license/
  10. */
  11. /**
  12. * Resolves relative urls, like a browser would.
  13. *
  14. * This function takes a basePath, which itself _may_ also be relative, and
  15. * then applies the relative path on top of it.
  16. *
  17. * @throws InvalidUriException
  18. */
  19. function resolve(string $basePath, string $newPath): string
  20. {
  21. $delta = parse($newPath);
  22. // If the new path defines a scheme, it's absolute and we can just return
  23. // that.
  24. if (null !== $delta['scheme']) {
  25. return build($delta);
  26. }
  27. $base = parse($basePath);
  28. $pick = function ($part) use ($base, $delta) {
  29. if (null !== $delta[$part]) {
  30. return $delta[$part];
  31. } elseif (null !== $base[$part]) {
  32. return $base[$part];
  33. }
  34. return null;
  35. };
  36. $newParts = [];
  37. $newParts['scheme'] = $pick('scheme');
  38. $newParts['host'] = $pick('host');
  39. $newParts['port'] = $pick('port');
  40. if (is_string($delta['path']) and strlen($delta['path']) > 0) {
  41. // If the path starts with a slash
  42. if ('/' === $delta['path'][0]) {
  43. $path = $delta['path'];
  44. } else {
  45. // Removing last component from base path.
  46. $path = (string) $base['path'];
  47. $length = strrpos($path, '/');
  48. if (false !== $length) {
  49. $path = substr($path, 0, $length);
  50. }
  51. $path .= '/'.$delta['path'];
  52. }
  53. } else {
  54. $path = $base['path'] ?? '/';
  55. if ('' === $path) {
  56. $path = '/';
  57. }
  58. }
  59. // Removing .. and .
  60. $pathParts = explode('/', $path);
  61. $newPathParts = [];
  62. foreach ($pathParts as $pathPart) {
  63. switch ($pathPart) {
  64. // case '' :
  65. case '.':
  66. break;
  67. case '..':
  68. array_pop($newPathParts);
  69. break;
  70. default:
  71. $newPathParts[] = $pathPart;
  72. break;
  73. }
  74. }
  75. $path = implode('/', $newPathParts);
  76. // If the source url ended with a /, we want to preserve that.
  77. $newParts['path'] = 0 === strpos($path, '/') ? $path : '/'.$path;
  78. // From PHP 8, no "?" query at all causes 'query' to be null.
  79. // An empty query "http://example.com/foo?" causes 'query' to be the empty string
  80. if (null !== $delta['query'] && '' !== $delta['query']) {
  81. $newParts['query'] = $delta['query'];
  82. } elseif (isset($base['query']) && null === $delta['host'] && null === $delta['path']) {
  83. // Keep the old query if host and path didn't change
  84. $newParts['query'] = $base['query'];
  85. }
  86. // From PHP 8, no "#" fragment at all causes 'fragment' to be null.
  87. // An empty fragment "http://example.com/foo#" causes 'fragment' to be the empty string
  88. if (null !== $delta['fragment'] && '' !== $delta['fragment']) {
  89. $newParts['fragment'] = $delta['fragment'];
  90. }
  91. return build($newParts);
  92. }
  93. /**
  94. * Takes a URI or partial URI as its argument, and normalizes it.
  95. *
  96. * After normalizing a URI, you can safely compare it to other URIs.
  97. * This function will for instance convert a %7E into a tilde, according to
  98. * rfc3986.
  99. *
  100. * It will also change a %3a into a %3A.
  101. *
  102. * @throws InvalidUriException
  103. */
  104. function normalize(string $uri): string
  105. {
  106. $parts = parse($uri);
  107. if (null !== $parts['path']) {
  108. $pathParts = explode('/', ltrim($parts['path'], '/'));
  109. $newPathParts = [];
  110. foreach ($pathParts as $pathPart) {
  111. switch ($pathPart) {
  112. case '.':
  113. // skip
  114. break;
  115. case '..':
  116. // One level up in the hierarchy
  117. array_pop($newPathParts);
  118. break;
  119. default:
  120. // Ensuring that everything is correctly percent-encoded.
  121. $newPathParts[] = rawurlencode(rawurldecode($pathPart));
  122. break;
  123. }
  124. }
  125. $parts['path'] = '/'.implode('/', $newPathParts);
  126. }
  127. if (null !== $parts['scheme']) {
  128. $parts['scheme'] = strtolower($parts['scheme']);
  129. $defaultPorts = [
  130. 'http' => '80',
  131. 'https' => '443',
  132. ];
  133. if (null !== $parts['port'] && isset($defaultPorts[$parts['scheme']]) && $defaultPorts[$parts['scheme']] == $parts['port']) {
  134. // Removing default ports.
  135. unset($parts['port']);
  136. }
  137. // A few HTTP specific rules.
  138. switch ($parts['scheme']) {
  139. case 'http':
  140. case 'https':
  141. if (null === $parts['path']) {
  142. // An empty path is equivalent to / in http.
  143. $parts['path'] = '/';
  144. }
  145. break;
  146. }
  147. }
  148. if (null !== $parts['host']) {
  149. $parts['host'] = strtolower($parts['host']);
  150. }
  151. return build($parts);
  152. }
  153. /**
  154. * Parses a URI and returns its individual components.
  155. *
  156. * This method largely behaves the same as PHP's parse_url, except that it will
  157. * return an array with all the array keys, including the ones that are not
  158. * set by parse_url, which makes it a bit easier to work with.
  159. *
  160. * Unlike PHP's parse_url, it will also convert any non-ascii characters to
  161. * percent-encoded strings. PHP's parse_url corrupts these characters on OS X.
  162. *
  163. * In the return array, key "port" is an int value. Other keys have a string value.
  164. * "Unused" keys have value null.
  165. *
  166. * @return array{scheme: string|null, host: string|null, path: string|null, port: positive-int|null, user: string|null, query: string|null, fragment: string|null}
  167. *
  168. * @throws InvalidUriException
  169. */
  170. function parse(string $uri): array
  171. {
  172. // Normally a URI must be ASCII. However, often it's not and
  173. // parse_url might corrupt these strings.
  174. //
  175. // For that reason we take any non-ascii characters from the uri and
  176. // uriencode them first.
  177. $uri = preg_replace_callback(
  178. '/[^[:ascii:]]/u',
  179. function ($matches) {
  180. return rawurlencode($matches[0]);
  181. },
  182. $uri
  183. );
  184. if (null === $uri) {
  185. throw new InvalidUriException('Invalid, or could not parse URI');
  186. }
  187. $result = parse_url($uri);
  188. if (false === $result) {
  189. $result = _parse_fallback($uri);
  190. }
  191. /*
  192. * phpstan is not able to process all the things that happen while this function
  193. * constructs the result array. It only understands the $result is
  194. * non-empty-array<string, mixed>
  195. *
  196. * But the detail of the returned array is correctly specified in the PHPdoc
  197. * above the function call.
  198. *
  199. * @phpstan-ignore-next-line
  200. */
  201. return
  202. $result + [
  203. 'scheme' => null,
  204. 'host' => null,
  205. 'path' => null,
  206. 'port' => null,
  207. 'user' => null,
  208. 'query' => null,
  209. 'fragment' => null,
  210. ];
  211. }
  212. /**
  213. * This function takes the components returned from PHP's parse_url, and uses
  214. * it to generate a new uri.
  215. *
  216. * @param array<string, int|string|null> $parts
  217. */
  218. function build(array $parts): string
  219. {
  220. $uri = '';
  221. $authority = '';
  222. if (isset($parts['host'])) {
  223. $authority = $parts['host'];
  224. if (isset($parts['user'])) {
  225. $authority = $parts['user'].'@'.$authority;
  226. }
  227. if (isset($parts['port'])) {
  228. $authority = $authority.':'.$parts['port'];
  229. }
  230. }
  231. if (isset($parts['scheme'])) {
  232. // If there's a scheme, there's also a host.
  233. $uri = $parts['scheme'].':';
  234. }
  235. if ('' !== $authority || (isset($parts['scheme']) && 'file' === $parts['scheme'])) {
  236. // No scheme, but there is a host.
  237. $uri .= '//'.$authority;
  238. }
  239. if (isset($parts['path'])) {
  240. $uri .= $parts['path'];
  241. }
  242. if (isset($parts['query'])) {
  243. $uri .= '?'.$parts['query'];
  244. }
  245. if (isset($parts['fragment'])) {
  246. $uri .= '#'.$parts['fragment'];
  247. }
  248. return $uri;
  249. }
  250. /**
  251. * Returns the 'dirname' and 'basename' for a path.
  252. *
  253. * The reason there is a custom function for this purpose, is because
  254. * basename() is locale aware (behaviour changes if C locale or a UTF-8 locale
  255. * is used) and we need a method that just operates on UTF-8 characters.
  256. *
  257. * In addition basename and dirname are platform aware, and will treat
  258. * backslash (\) as a directory separator on Windows.
  259. *
  260. * This method returns the 2 components as an array.
  261. *
  262. * If there is no dirname, it will return an empty string. Any / appearing at
  263. * the end of the string is stripped off.
  264. *
  265. * @return array<int, mixed>
  266. */
  267. function split(string $path): array
  268. {
  269. $matches = [];
  270. if (1 === preg_match('/^(?:(?:(.*)(?:\/+))?([^\/]+))(?:\/?)$/u', $path, $matches)) {
  271. return [$matches[1], $matches[2]];
  272. }
  273. return [null, null];
  274. }
  275. /**
  276. * This function is another implementation of parse_url, except this one is
  277. * fully written in PHP.
  278. *
  279. * The reason is that the PHP bug team is not willing to admit that there are
  280. * bugs in the parse_url implementation.
  281. *
  282. * This function is only called if the main parse method fails. It's pretty
  283. * crude and probably slow, so the original parse_url is usually preferred.
  284. *
  285. * @return array{scheme: string|null, host: string|null, path: string|null, port: positive-int|null, user: string|null, query: string|null, fragment: string|null}
  286. *
  287. * @throws InvalidUriException
  288. */
  289. function _parse_fallback(string $uri): array
  290. {
  291. // Normally a URI must be ASCII, however. However, often it's not and
  292. // parse_url might corrupt these strings.
  293. //
  294. // For that reason we take any non-ascii characters from the uri and
  295. // uriencode them first.
  296. $uri = preg_replace_callback(
  297. '/[^[:ascii:]]/u',
  298. function ($matches) {
  299. return rawurlencode($matches[0]);
  300. },
  301. $uri
  302. );
  303. if (null === $uri) {
  304. throw new InvalidUriException('Invalid, or could not parse URI');
  305. }
  306. $result = [
  307. 'scheme' => null,
  308. 'host' => null,
  309. 'port' => null,
  310. 'user' => null,
  311. 'path' => null,
  312. 'fragment' => null,
  313. 'query' => null,
  314. ];
  315. if (1 === preg_match('% ^([A-Za-z][A-Za-z0-9+-\.]+): %x', $uri, $matches)) {
  316. $result['scheme'] = $matches[1];
  317. // Take what's left.
  318. $uri = substr($uri, strlen($result['scheme']) + 1);
  319. if (false === $uri) {
  320. // There was nothing left.
  321. $uri = '';
  322. }
  323. }
  324. // Taking off a fragment part
  325. if (false !== strpos($uri, '#')) {
  326. list($uri, $result['fragment']) = explode('#', $uri, 2);
  327. }
  328. // Taking off the query part
  329. if (false !== strpos($uri, '?')) {
  330. list($uri, $result['query']) = explode('?', $uri, 2);
  331. }
  332. if ('///' === substr($uri, 0, 3)) {
  333. // The triple slash uris are a bit unusual, but we have special handling
  334. // for them.
  335. $path = substr($uri, 2);
  336. if (false === $path) {
  337. throw new \RuntimeException('The string cannot be false');
  338. }
  339. $result['path'] = $path;
  340. $result['host'] = '';
  341. } elseif ('//' === substr($uri, 0, 2)) {
  342. // Uris that have an authority part.
  343. $regex = '%^
  344. //
  345. (?: (?<user> [^:@]+) (: (?<pass> [^@]+)) @)?
  346. (?<host> ( [^:/]* | \[ [^\]]+ \] ))
  347. (?: : (?<port> [0-9]+))?
  348. (?<path> / .*)?
  349. $%x';
  350. if (1 !== preg_match($regex, $uri, $matches)) {
  351. throw new InvalidUriException('Invalid, or could not parse URI');
  352. }
  353. if (isset($matches['host']) && '' !== $matches['host']) {
  354. $result['host'] = $matches['host'];
  355. }
  356. if (isset($matches['port'])) {
  357. $port = (int) $matches['port'];
  358. if ($port > 0) {
  359. $result['port'] = $port;
  360. }
  361. }
  362. if (isset($matches['path'])) {
  363. $result['path'] = $matches['path'];
  364. }
  365. if (isset($matches['user']) && '' !== $matches['user']) {
  366. $result['user'] = $matches['user'];
  367. }
  368. if (isset($matches['pass']) && '' !== $matches['pass']) {
  369. $result['pass'] = $matches['pass'];
  370. }
  371. } else {
  372. $result['path'] = $uri;
  373. }
  374. return $result;
  375. }