PageRenderTime 65ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 0ms

/source/src/System/Classes/url.cpp

http://itexmacs.googlecode.com/
C++ | 885 lines | 672 code | 92 blank | 121 comment | 284 complexity | e584a20b2aa04a9397f11e21df402a1b MD5 | raw file
Possible License(s): GPL-3.0, GPL-2.0, MPL-2.0-no-copyleft-exception, LGPL-2.0
  1. /******************************************************************************
  2. * MODULE : url.cpp
  3. * DESCRIPTION: unified resource location handling
  4. * COPYRIGHT : (C) 1999 Joris van der Hoeven
  5. *******************************************************************************
  6. * The url class uses a tree representation for urls.
  7. * This allows us to generalize the concept of an url and allow paths and
  8. * patterns to be regarded as urls too. An url is either a string or a tuple
  9. * of one of the following types:
  10. * "." -- here
  11. * ".." -- parent
  12. * none -- invalid url
  13. * concat -- a/b/c is represented as (concat "a" (concat "b" "c"));
  14. * or -- the path a:b/c is represented as (or "a" (concat "b" "c"));
  15. * root -- the url http://gnu.org yields (concat (root "http") "gnu.org");
  16. * wildcard -- (wildcard) corresponds to any url, (wildcard "*.tm")
  17. * to all strings which end with .tm and (wildcard "*.tm" "file")
  18. * to all TeXmacs files (i.e. discarding directories ending with .tm).
  19. *******************************************************************************
  20. * There are three main types of urls:
  21. * - rootless urls, like a/b/c. These urls are mainly used in computations.
  22. * For example, they can be appended to another url.
  23. * - Standard rooted urls, like file:///usr or http://www.texmacs.org.
  24. * These are the same as those used on the web.
  25. * - System urls, characterized by a "default" root.
  26. * These urls are similar to standard rooted urls, but they behave
  27. * in a slightly different way with respect to concatenation.
  28. * For instance http://www.texmacs.org/Web * file:///tmp would yield
  29. * file:///tmp, where as http://www.texmacs.org/Web * /tmp yields
  30. * http://www.texmacs.org/tmp
  31. *******************************************************************************
  32. * There are several formats for parsing (and printing) urls:
  33. * - System format: the usual format on your operating system.
  34. * On unix systems "/usr/bin:/usr/local/bin" would be a valid url
  35. * representing a path and on windows systems "c:\windows;c:\TeXmacs"
  36. * would be OK.
  37. * - Unix format: this format forces unix-like notation even for
  38. * other systems like Windows. This is convenient for url's in
  39. * the source code. Unix environment variables like ~ and $TEXMACS_PATH
  40. * can also be part of the url.
  41. * - Standard format: the format which is used on the web.
  42. * Notice that ftp://www.texmacs.org/pub and ftp://www.texmacs.org/pub/
  43. * represent different urls. The second one is represented by concating
  44. * on the right with an empty name.
  45. *******************************************************************************
  46. * When an explicit operation on urls need to be performed,
  47. * like reading a file, the url is first "resolved" into a simple url
  48. * with a unique name (modulo symbolic links) for the resource.
  49. * Next, the url is "concretized" as a file name which is understood
  50. * by the operating system. This may require searching the file from the web.
  51. * Concretized urls should be used quickly and not memorized,
  52. * since such names may be the names of temporary files,
  53. * which may be destroyed soon afterwards.
  54. *******************************************************************************
  55. * This software falls under the GNU general public license version 3 or later.
  56. * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
  57. * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
  58. ******************************************************************************/
  59. #include "boot.hpp"
  60. #include "url.hpp"
  61. #include "sys_utils.hpp"
  62. #include "web_files.hpp"
  63. #include "file.hpp"
  64. #include "analyze.hpp"
  65. #include <ctype.h>
  66. #if defined(OS_WIN32) || defined(__MINGW32__)
  67. #define WINPATHS
  68. #endif
  69. #ifdef WINPATHS
  70. #define URL_CONCATER '\\'
  71. #define URL_SEPARATOR ';'
  72. #else
  73. #define URL_CONCATER '/'
  74. #define URL_SEPARATOR ':'
  75. #endif
  76. /******************************************************************************
  77. * Unrooted url constructors
  78. ******************************************************************************/
  79. static url
  80. url_get_atom (string s, int type) {
  81. if (type < URL_STANDARD) {
  82. if (s == "~") return url_system (get_env ("HOME"));
  83. if (starts (s, "$")) {
  84. string val= get_env (s (1, N(s)));
  85. if (val == "") return url_none ();
  86. return unblank (url_system (val));
  87. }
  88. }
  89. return as_url (tree (s));
  90. }
  91. static url
  92. url_get_name (string s, int type= URL_STANDARD, int i=0) {
  93. char sep= (type == URL_SYSTEM)? URL_CONCATER: '/';
  94. int start= i, n= N(s);
  95. while ((i<n) && (s[i] != sep) && (s[i] != '/')) i++;
  96. url u= url_get_atom (s (start, i), type);
  97. // url u= tree (s (start, i));
  98. if (i == n) return u;
  99. if (start == i) return url_get_name (s, type, i+1);
  100. return u * url_get_name (s, type, i+1);
  101. }
  102. static url
  103. url_get_path (string s, int type= URL_STANDARD, int i=0) {
  104. char sep= (type == URL_SYSTEM)? URL_SEPARATOR: ':';
  105. int start= i, n= N(s);
  106. if (i == n) return url_none ();
  107. while ((i<n) && (s[i] != sep)) i++;
  108. url u= url_general (s (start, i), type);
  109. if (i == n) return u;
  110. if (start == i) return url_get_path (s, type, i+1);
  111. return u | url_get_path (s, type, i+1);
  112. }
  113. /******************************************************************************
  114. * Rooted url constructors
  115. ******************************************************************************/
  116. url
  117. url_root (string protocol) {
  118. return as_url (tuple ("root", protocol));
  119. }
  120. url
  121. url_ramdisc (string contents) {
  122. return as_url (tuple ("root", "ramdisc", contents));
  123. }
  124. static url
  125. url_default (string name, int type= URL_SYSTEM) {
  126. url u= url_get_name (name, type);
  127. #ifdef WINPATHS
  128. // FIXME: this hack seems a bit too simple
  129. if (is_concat (u) && (u[2]->t == "")) u= u[1];
  130. // cout << name << " -> " << url_root ("default") * u << "\n";
  131. return url_root ("default") * u;
  132. #else
  133. if (u->t == "") return url_root ("default");
  134. return url_root ("default") * u;
  135. #endif
  136. }
  137. static url
  138. url_path (string s, int type= URL_SYSTEM) {
  139. url u= url_get_path (s, type);
  140. return u;
  141. }
  142. static url
  143. url_local (string name) {
  144. url u= url_get_name (name, URL_SYSTEM);
  145. return reroot (u, "file");
  146. }
  147. static url
  148. url_file (string name) {
  149. url u= url_get_name (name);
  150. return url_root ("file") * u;
  151. }
  152. static url
  153. url_http (string name) {
  154. url u= url_get_name (name);
  155. return url_root ("http") * u;
  156. }
  157. static url
  158. url_ftp (string name) {
  159. url u= url_get_name (name);
  160. return url_root ("ftp") * u;
  161. }
  162. static url
  163. url_tmfs (string name) {
  164. url u= url_get_name (name);
  165. return url_root ("tmfs") * u;
  166. }
  167. /******************************************************************************
  168. * Generic url constructor
  169. ******************************************************************************/
  170. static bool
  171. heuristic_is_path (string name, int type) {
  172. char sep= (type==0)? URL_SEPARATOR: ':';
  173. int i, n= N(name);
  174. for (i=0; i<n; i++)
  175. if (name[i] == sep)
  176. return true;
  177. return false;
  178. }
  179. static bool
  180. heuristic_is_default (string name, int type) {
  181. #ifdef WINPATHS
  182. // FIXME: we probably should take into account 'type' too
  183. if (N(name) < 2) return false;
  184. if ((name[0] == '\\') && (name[1] == '\\')) return true;
  185. return
  186. isalpha (name[0]) && (name[1] == ':') &&
  187. ((N(name)==2) || (name[2] == '\\') || (name[2] == '/'));
  188. #else
  189. char sep= (type==0)? URL_CONCATER: '/';
  190. return (name != "") && (name[0] == sep);
  191. #endif
  192. }
  193. static bool
  194. heuristic_is_http (string name) {
  195. return starts (name, "www.");
  196. // FIXME: we might want to recognize some other ones like google.com too
  197. }
  198. static bool
  199. heuristic_is_ftp (string name) {
  200. return starts (name, "ftp.");
  201. }
  202. url
  203. url_general (string name, int type= URL_SYSTEM) {
  204. if (starts (name, "local:")) return url_local (name (6, N (name)));
  205. if (starts (name, "file://")) return url_file (name (7, N (name)));
  206. if (starts (name, "http://")) return url_http (name (7, N (name)));
  207. if (starts (name, "ftp://")) return url_ftp (name (6, N (name)));
  208. if (starts (name, "tmfs://")) return url_tmfs (name (7, N (name)));
  209. if (heuristic_is_path (name, type)) return url_path (name, type);
  210. if (heuristic_is_default (name, type)) return url_default (name, type);
  211. if (heuristic_is_http (name)) return url_http (name);
  212. if (heuristic_is_ftp (name)) return url_ftp (name);
  213. return url_get_name (name, type);
  214. }
  215. url
  216. url_unix (string name) {
  217. return url_general (name, URL_UNIX);
  218. }
  219. url
  220. url_unix (string dir, string name) {
  221. return url_unix (dir) * url_unix (name);
  222. }
  223. url
  224. url_system (string name) {
  225. return url_general (name, URL_SYSTEM);
  226. }
  227. url
  228. url_system (string dir, string name) {
  229. return url_system (dir) * url_system (name);
  230. }
  231. url
  232. url_standard (string name) {
  233. return url_general (name, URL_STANDARD);
  234. }
  235. url
  236. url_standard (string dir, string name) {
  237. return url_standard (dir) * url_standard (name);
  238. }
  239. url::url (const char* name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
  240. url::url (string name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
  241. url::url (string path_name, string name):
  242. rep (tm_new<url_rep> (url_unix (path_name, name)->t)) {}
  243. /******************************************************************************
  244. * Computational url constructors
  245. ******************************************************************************/
  246. static bool
  247. is_semi_root (url u) {
  248. // url u such that u/.. == u (website or windows drive name)
  249. #ifdef WINPATHS
  250. return is_concat (u) && is_root (u[1]) && is_atomic (u[2]);
  251. #else
  252. return is_concat (u) && is_root_web (u[1]) && is_atomic (u[2]);
  253. #endif
  254. }
  255. url
  256. operator * (url u1, url u2) {
  257. //cout << "concat " << u1->t << " * " << u2->t << "\n";
  258. if (is_root (u2) || (is_concat (u2) && is_root (u2[1]))) {
  259. if (is_concat (u1) && is_root_web (u1[1]) &&
  260. (is_root (u2, "default") ||
  261. (is_concat (u2) && is_root (u2[1], "default"))))
  262. {
  263. url v= u1[2];
  264. while (is_concat (v)) v= v[1];
  265. if (is_root (u2)) return u1[1] * v;
  266. return u1[1] * v * u2[2];
  267. }
  268. return u2;
  269. }
  270. if (is_here (u1) || (u1->t == "")) return u2;
  271. if (is_here (u2)) return u1;
  272. if (is_none (u1)) return url_none ();
  273. if (is_none (u2)) return url_none ();
  274. if (u2 == url_parent ()) {
  275. if (is_root (u1)) return u1;
  276. if (is_atomic (u1) && (!is_parent (u1))) return url_here ();
  277. if (is_semi_root (u1))
  278. return u1;
  279. }
  280. if (is_concat (u2) && (u2[1] == url_parent ())) {
  281. if (is_root (u1)) return u1 * u2[2];
  282. if (is_atomic (u1) && (!is_parent (u1))) return u2[2];
  283. if (is_semi_root (u1))
  284. return u1 * u2[2];
  285. }
  286. if (is_concat (u2) && (u2[1] == url_ancestor ())) {
  287. if (is_root (u1) || is_semi_root (u1)) return u1 * u2[2];
  288. return (u1 * u2[2]) | ((u1 * url_parent ()) * u2);
  289. }
  290. if (is_concat (u1)) return u1[1] * (u1[2] * u2);
  291. return as_url (tuple ("concat", u1->t, u2->t));
  292. }
  293. url
  294. operator * (url u1, const char* name) {
  295. return u1 * url (name);
  296. }
  297. url
  298. operator * (url u1, string name) {
  299. return u1 * url (name);
  300. }
  301. url
  302. operator | (url u1, url u2) {
  303. if (is_none (u1)) return u2;
  304. if (is_none (u2)) return u1;
  305. if (is_or (u1)) return u1[1] | (u1[2] | u2);
  306. if (u1 == u2) return u2;
  307. if (is_or (u2) && (u1 == u2[1])) return u2;
  308. return as_url (tuple ("or", u1->t, u2->t));
  309. }
  310. url
  311. url_wildcard () {
  312. return as_url (tuple ("wildcard"));
  313. }
  314. url
  315. url_wildcard (string name) {
  316. return as_url (tuple ("wildcard", name));
  317. }
  318. /******************************************************************************
  319. * url predicates
  320. ******************************************************************************/
  321. bool
  322. is_rooted (url u) {
  323. return
  324. is_root (u) ||
  325. (is_concat (u) && is_rooted (u[1])) ||
  326. (is_or (u) && is_rooted (u[1]) && is_rooted (u[2]));
  327. }
  328. bool
  329. is_rooted (url u, string protocol) {
  330. return
  331. is_root (u, protocol) ||
  332. (is_concat (u) && is_rooted (u[1], protocol)) ||
  333. (is_or (u) && is_rooted (u[1], protocol) && is_rooted (u[2], protocol));
  334. }
  335. bool
  336. is_rooted_web (url u) {
  337. return
  338. is_root_web (u) ||
  339. (is_concat (u) && is_rooted_web (u[1])) ||
  340. (is_or (u) && is_rooted_web (u[1]) && is_rooted_web (u[2]));
  341. }
  342. bool
  343. is_rooted_tmfs (url u) {
  344. return
  345. is_root_tmfs (u) ||
  346. (is_concat (u) && is_rooted_tmfs (u[1])) ||
  347. (is_or (u) && is_rooted_tmfs (u[1]) && is_rooted_tmfs (u[2]));
  348. }
  349. bool
  350. is_name (url u) {
  351. if (is_atomic (u)) return true;
  352. if (!is_concat (u)) return false;
  353. return is_name (u[1]) && is_name (u[2]);
  354. }
  355. bool
  356. is_rooted_name (url u) {
  357. return is_concat (u) && is_root (u[1]) && is_name (u[2]);
  358. }
  359. bool
  360. is_name_in_path (url u) {
  361. if (is_name (u)) return true;
  362. return is_concat (u) && is_root (u[1], "default") && is_name (u[2]);
  363. }
  364. bool
  365. is_path (url u) {
  366. if (is_atomic (u)) return true;
  367. if ((!is_or (u)) && (!is_concat (u))) return false;
  368. return is_path (u[1]) && is_path (u[2]);
  369. }
  370. bool
  371. is_rooted_path (url u) {
  372. return is_rooted (u) && is_path (u);
  373. }
  374. bool
  375. is_ramdisc (url u) {
  376. return is_concat (u) && is_root (u[1], "ramdisc");
  377. }
  378. /******************************************************************************
  379. * Conversion routines for urls
  380. ******************************************************************************/
  381. string
  382. as_string (url u, int type) {
  383. // This routine pritty prints an url as a string.
  384. // FIXME: the current algorithm is quadratic in time.
  385. if (is_none (u)) return "{}";
  386. if (is_atomic (u)) return u->t->label;
  387. if (is_concat (u)) {
  388. int stype= type;
  389. if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
  390. string sep= (stype==URL_SYSTEM? string (URL_CONCATER): string ("/"));
  391. string s1 = as_string (u[1], type);
  392. string s2 = as_string (u[2], stype);
  393. if (is_root (u[1], "default")) s1= "";
  394. if ((!is_name (u[1])) && (!is_root (u[1]))) s1= "{" * s1 * "}";
  395. if ((!is_concat (u[2])) && (!is_atomic (u[2])) && (!is_wildcard (u[2], 1)))
  396. s2= "{" * s2 * "}";
  397. #ifdef WINPATHS
  398. if (is_semi_root (u)) {
  399. if (ends (s2, ":")) return s2 * "\\";
  400. else return s2;
  401. }
  402. if (is_root (u[1]) && stype == URL_SYSTEM) return s2;
  403. #endif
  404. return s1 * sep * s2;
  405. }
  406. if (is_or (u)) {
  407. string s1= as_string (u[1], type);
  408. string s2= as_string (u[2], type);
  409. if (!is_name_in_path (u[1])) s1= "{" * s1 * "}";
  410. if ((!is_or (u[2])) && (!is_name_in_path (u[2]))) s2= "{" * s2 * "}";
  411. #ifdef WINPATHS
  412. if (type == URL_STANDARD) return s1 * ":" * s2;
  413. else return s1 * string (URL_SEPARATOR) * s2;
  414. #else
  415. return s1 * string (URL_SEPARATOR) * s2;
  416. #endif
  417. }
  418. #ifdef WINPATHS
  419. if (is_root (u, "default")) {
  420. int stype= type;
  421. if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
  422. if (stype == URL_SYSTEM) return ""; else return "/";
  423. }
  424. #else
  425. if (is_root (u, "default")) return "/";
  426. #endif
  427. if (is_root (u, "file")) return u[1]->t->label * "://";
  428. if (is_root (u)) return u[1]->t->label * ":/";
  429. if (is_wildcard (u, 0)) return "**";
  430. if (is_wildcard (u, 1)) return u->t[1]->label;
  431. FAILED ("bad url");
  432. return "";
  433. }
  434. tm_ostream&
  435. operator << (tm_ostream& out, url u) {
  436. return out << as_string (u, URL_SYSTEM);
  437. }
  438. /******************************************************************************
  439. * Operations on urls
  440. ******************************************************************************/
  441. url
  442. head (url u) {
  443. return u * url_parent ();
  444. }
  445. url
  446. tail (url u) {
  447. if (is_concat (u)) {
  448. if (is_root_web (u[1]) && is_atomic (u[2])) return url_here ();
  449. return tail (u[2]);
  450. }
  451. if (is_or (u)) return tail (u[1]) | tail (u[2]);
  452. if (is_root (u)) return url_here ();
  453. return u;
  454. }
  455. string
  456. suffix (url u) {
  457. u= tail (u);
  458. if (!is_atomic (u)) return "";
  459. string s= as_string (u);
  460. int i, n= N(s);
  461. for (i=n-1; i>=0; i--)
  462. if (s[i]=='.') break;
  463. if ((i>0) && (i<n-1)) {
  464. string r= s (i+1, n);
  465. while ((N(r)>0) && (r[N(r)-1]=='~' || r[N(r)-1]=='#')) r= r(0, N(r)-1);
  466. return r;
  467. }
  468. return "";
  469. }
  470. url
  471. glue (url u, string s) {
  472. if (is_atomic (u)) return as_url (tree (u->t->label * s));
  473. if (is_concat (u)) return u[1] * glue (u[2], s);
  474. if (is_or (u)) return glue (u[1], s) | glue (u[2], s);
  475. cerr << "\nu= " << u << "\n";
  476. cerr << "s= " << s << "\n";
  477. FAILED ("can't glue string to url");
  478. return u;
  479. }
  480. url
  481. unglue (url u, int nr) {
  482. if (is_atomic (u))
  483. return as_url (tree (u->t->label (0, N(u->t->label) - nr)));
  484. if (is_concat (u)) return u[1] * unglue (u[2], nr);
  485. if (is_or (u)) return unglue (u[1], nr) | unglue (u[2], nr);
  486. cerr << "\nu= " << u << "\n";
  487. cerr << "nr= " << nr << "\n";
  488. FAILED ("can't unglue from url");
  489. return u;
  490. }
  491. url
  492. unblank (url u) {
  493. if (is_concat (u) && (u[2]->t == "")) return u[1];
  494. if (is_concat (u)) return u[1] * unblank (u[2]);
  495. if (is_or (u)) return unblank (u[1]) | unblank (u[2]);
  496. return u;
  497. }
  498. url
  499. relative (url base, url u) {
  500. return head (base) * u;
  501. }
  502. url
  503. delta_sub (url base, url u) {
  504. #ifdef WINPATHS
  505. if (is_atomic (base) || heuristic_is_default (as_string (base), URL_SYSTEM))
  506. return u;
  507. #else
  508. if (is_atomic (base))
  509. return u;
  510. #endif
  511. if (is_concat (base) && is_concat (u) && (base[1] == u[1]))
  512. return delta_sub (base[2], u[2]);
  513. if (is_concat (base))
  514. return url_parent () * delta_sub (head (base), u);
  515. return url_none ();
  516. }
  517. url
  518. delta (url base, url u) {
  519. if (is_or (u))
  520. return delta (base, u[1]) | delta (base, u[2]);
  521. url res= delta_sub (base, u);
  522. if (is_none (res)) return u;
  523. return res;
  524. }
  525. static url
  526. expand (url u1, url u2) {
  527. if (is_or (u1)) return expand (u1[1], u2) | expand (u1[2], u2);
  528. if (is_or (u2)) return expand (u1, u2[1]) | expand (u1, u2[2]);
  529. return u1 * u2;
  530. }
  531. url
  532. expand (url u) {
  533. if (is_or (u)) return expand (u[1]) | expand (u[2]);
  534. if (is_concat (u)) return expand (expand (u[1]), expand (u[2]));
  535. return u;
  536. }
  537. bool
  538. descends (url u, url base) {
  539. if (is_or (base)) return descends (u, base[1]) || descends (u, base[2]);
  540. if (is_concat (u) && is_atomic (base))
  541. return u[1] == base;
  542. if (is_concat (u) && is_concat (base))
  543. return u[1] == base[1] && descends (u[2], base[2]);
  544. return false;
  545. }
  546. bool
  547. is_secure (url u) {
  548. return descends (u, expand (url_path ("$TEXMACS_SECURE_PATH")));
  549. }
  550. /******************************************************************************
  551. * Url sorting and factorization
  552. ******************************************************************************/
  553. static bool
  554. operator <= (url u1, url u2) {
  555. if (is_atomic (u1) && is_atomic (u2))
  556. return u1->t->label <= u2->t->label;
  557. if (is_atomic (u1)) return true;
  558. if (is_atomic (u2)) return false;
  559. if (is_concat (u1) && is_concat (u2)) {
  560. if (u1[1] == u2[1]) return u1[2] <= u2[2];
  561. else return u1[1] <= u2[1];
  562. }
  563. if (is_concat (u1)) return true;
  564. if (is_concat (u2)) return false;
  565. return true; // does not matter for sorting
  566. }
  567. static url
  568. sort_sub (url add, url to) {
  569. if (is_or (to)) {
  570. if (add <= to[1]) return add | to;
  571. return to[1] | sort_sub (add, to[2]);
  572. }
  573. if (add <= to) return add | to;
  574. else return to | add;
  575. }
  576. url
  577. sort (url u) {
  578. if (is_or (u))
  579. return sort_sub (u[1], sort (u[2]));
  580. else return u;
  581. }
  582. static url
  583. factor_sorted (url u) {
  584. if (!is_or (u)) return u;
  585. url v= factor_sorted (u[2]);
  586. if (is_concat (u[1])) {
  587. if (is_concat (v) && (u[1][1] == v[1]))
  588. return u[1][1] * (u[1][2] | v[2]);
  589. if (is_or (v) && is_concat (v[1]) && (u[1][1] == v[1][1]))
  590. return (u[1][1] * (u[1][2] | v[1][2])) | v[2];
  591. }
  592. return u[1] | v;
  593. }
  594. static url
  595. factor_sub (url u) {
  596. if (is_concat (u)) return u[1] * factor (u[2]);
  597. if (is_or (u)) return factor_sub (u[1]) | factor_sub (u[2]);
  598. return u;
  599. }
  600. url
  601. factor (url u) {
  602. return factor_sub (factor_sorted (sort (u)));
  603. }
  604. /******************************************************************************
  605. * Url resolution and wildcard expansion
  606. ******************************************************************************/
  607. url complete (url base, url u, string filter, bool flag);
  608. url
  609. reroot (url u, string protocol) {
  610. if (is_concat (u)) return reroot (u[1], protocol) * u[2];
  611. if (is_or (u)) return reroot (u[1], protocol) | reroot (u[2], protocol);
  612. if (is_root (u)) return url_root (protocol);
  613. return u;
  614. }
  615. static url
  616. complete (url base, url sub, url u, string filter, bool flag) {
  617. if (is_or (sub)) {
  618. url res1= complete (base, sub[1], u, filter, flag);
  619. if ((!is_none (res1)) && flag) return res1;
  620. return res1 | complete (base, sub[2], u, filter, flag);
  621. }
  622. if (is_concat (sub) && is_rooted (sub[1])) {
  623. url res= complete (sub[1], sub[2], u, filter, flag);
  624. return sub[1] * res;
  625. }
  626. return sub * complete (base * sub, u, filter, flag);
  627. }
  628. url
  629. complete (url base, url u, string filter, bool flag) {
  630. // cout << "complete " << base << " |||| " << u << LF;
  631. if (is_none (base)) return base;
  632. if (is_none (u)) return u;
  633. if ((!is_root (base)) && (!is_rooted_name (base))) {
  634. cerr << "base= " << base << LF;
  635. FAILED ("invalid base url");
  636. }
  637. if (is_name (u) || (is_concat (u) && is_root (u[1]) && is_name (u[2]))) {
  638. url comp= base * u;
  639. if (is_rooted (comp, "default") || is_rooted (comp, "file")) {
  640. if (is_of_type (comp, filter)) return reroot (u, "default");
  641. return url_none ();
  642. }
  643. if (is_rooted_web (comp) || is_rooted_tmfs (comp) || is_ramdisc (comp)) {
  644. if (is_of_type (comp, filter)) return u;
  645. return url_none ();
  646. }
  647. cerr << LF << "base= " << base << LF;
  648. ASSERT (is_rooted (comp), "unrooted url");
  649. FAILED ("bad protocol in url");
  650. }
  651. if (is_root (u)) {
  652. // FIXME: test filter flags here
  653. return u;
  654. }
  655. if (is_concat (u)) {
  656. url sub= complete (base, u[1], "", false);
  657. // "" should often be faster than the more correct "d" here
  658. return complete (base, sub, u[2], filter, flag);
  659. }
  660. if (is_or (u)) {
  661. url res1= complete (base, u[1], filter, flag);
  662. if ((!is_none (res1)) && flag) return res1;
  663. return res1 | complete (base, u[2], filter, flag);
  664. }
  665. if (is_wildcard (u)) {
  666. // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
  667. if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
  668. cerr << LF << "base= " << base << LF;
  669. FAILED ("wildcards only implemented for files");
  670. }
  671. url ret= url_none ();
  672. if (is_wildcard (u, 0) && is_of_type (base, filter)) ret= url_here ();
  673. bool error_flag;
  674. array<string> dir= read_directory (base, error_flag);
  675. int i, n= N(dir);
  676. for (i=0; i<n; i++) {
  677. if ((!is_none (ret)) && flag) return ret;
  678. if ((dir[i] == ".") || (dir[i] == "..")) continue;
  679. if (is_wildcard (u, 0))
  680. ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
  681. else if (match_wildcard (dir[i], u[1]->t->label))
  682. ret= ret | complete (base, dir[i], filter, flag);
  683. }
  684. return ret;
  685. }
  686. cout << LF << "url= " << u << LF;
  687. FAILED ("bad url");
  688. return u;
  689. }
  690. url
  691. complete (url u, string filter, bool flag) {
  692. url home= url_pwd ();
  693. return home * complete (home, u, filter, flag);
  694. }
  695. url
  696. complete (url u, string filter) {
  697. // This routine can be used in order to find all possible matches
  698. // for the wildcards in an url and replace the wildcards by these matches.
  699. // Moreover, matches are normalized (file root -> default root).
  700. return complete (u, filter, false);
  701. }
  702. url
  703. resolve (url u, string filter) {
  704. // This routine does the same thing as complete, but it stops at
  705. // the first match. It is particularly useful for finding files in paths.
  706. return complete (u, filter, true);
  707. /*
  708. url res= complete (u, filter, true);
  709. if (is_none (res))
  710. cout << "Failed resolution of " << u << ", " << filter << LF;
  711. return res;
  712. */
  713. }
  714. url
  715. resolve_in_path (url u) {
  716. if (use_which) {
  717. string name = escape_sh (as_string (u));
  718. string which= var_eval_system ("which " * name * " 2> /dev/null");
  719. if (ends (which, name))
  720. return which;
  721. else if ((which != "") &&
  722. (!starts (which, "which: ")) &&
  723. (!starts (which, "no ")))
  724. cout << "TeXmacs] " << which << "\n";
  725. }
  726. return resolve (url_path ("$PATH") * u, "x");
  727. }
  728. bool
  729. exists (url u) {
  730. return !is_none (resolve (u, "r"));
  731. }
  732. bool
  733. exists_in_path (url u) {
  734. #if defined (OS_WIN32) || defined (__MINGW__) || defined (__MINGW32__)
  735. return !is_none (resolve_in_path (url (as_string (u) * ".exe")));
  736. #else
  737. return !is_none (resolve_in_path (u));
  738. #endif
  739. }
  740. bool
  741. has_permission (url u, string filter) {
  742. return !is_none (resolve (u, filter));
  743. }
  744. static url
  745. descendance_sub (url u) {
  746. if (is_or (u))
  747. return descendance_sub (u[1]) | descendance_sub (u[2]);
  748. return complete (u, url_wildcard (), "r", false);
  749. }
  750. url
  751. descendance (url u) {
  752. // Utility for style and package menus in tm_server.cpp
  753. // Compute and merge subdirectories of directories in path
  754. return factor (descendance_sub (u));
  755. }
  756. /******************************************************************************
  757. * Concretization of resolved urls
  758. ******************************************************************************/
  759. string
  760. concretize (url u) {
  761. // This routine transforms a resolved url into a system file name.
  762. // In the case of distant files from the web, a local copy is created.
  763. #ifdef WINPATHS
  764. // FIXME: this fix seems strange;
  765. // to start with, the if condition is not respected
  766. string s = as_string (u);
  767. if (starts (s, "file:///")) s= s (8, N(s));
  768. if (heuristic_is_default (s, 0)) return s;
  769. #else
  770. if (is_rooted (u, "default") || is_rooted (u, "file"))
  771. return as_string (reroot (u, "default"));
  772. #endif
  773. if (is_rooted_web (u)) return concretize (get_from_web (u));
  774. if (is_rooted_tmfs (u)) return concretize (get_from_server (u));
  775. if (is_ramdisc (u)) return concretize (get_from_ramdisc (u));
  776. if (is_here (u)) return as_string (url_pwd ());
  777. if (is_parent (u)) return as_string (url_pwd () * url_parent ());
  778. if (is_wildcard (u, 1)) return u->t[1]->label;
  779. cerr << "TeXmacs] couldn't concretize " << u->t << LF;
  780. // cerr << "\nu= " << u << LF;
  781. // FAILED ("url has no root");
  782. return "xxx";
  783. }
  784. string
  785. materialize (url u, string filter) {
  786. // Combines resolve and concretize
  787. url r= resolve (u, filter);
  788. if (!(is_rooted (r) || is_here (r) || is_parent (r))) {
  789. cerr << "\nu= " << u << LF;
  790. FAILED ("url could not be resolved");
  791. }
  792. return concretize (r);
  793. }