/src/fileio.c

https://bitbucket.org/ultra_iter/vim-qt · C · 10387 lines · 8914 code · 369 blank · 1104 comment · 1570 complexity · 7577f997c8f865403772e706d7db9d28 MD5 · raw file

Large files are truncated click here to view the full file

  1. /* vi:set ts=8 sts=4 sw=4:
  2. *
  3. * VIM - Vi IMproved by Bram Moolenaar
  4. *
  5. * Do ":help uganda" in Vim to read copying and usage conditions.
  6. * Do ":help credits" in Vim to see a list of people who contributed.
  7. * See README.txt for an overview of the Vim source code.
  8. */
  9. /*
  10. * fileio.c: read from and write to a file
  11. */
  12. #include "vim.h"
  13. #if defined(__TANDEM) || defined(__MINT__)
  14. # include <limits.h> /* for SSIZE_MAX */
  15. #endif
  16. #if defined(HAVE_UTIME) && defined(HAVE_UTIME_H)
  17. # include <utime.h> /* for struct utimbuf */
  18. #endif
  19. #define BUFSIZE 8192 /* size of normal write buffer */
  20. #define SMBUFSIZE 256 /* size of emergency write buffer */
  21. #ifdef FEAT_CRYPT
  22. /* crypt_magic[0] is pkzip crypt, crypt_magic[1] is sha2+blowfish */
  23. static char *crypt_magic[] = {"VimCrypt~01!", "VimCrypt~02!"};
  24. static char crypt_magic_head[] = "VimCrypt~";
  25. # define CRYPT_MAGIC_LEN 12 /* must be multiple of 4! */
  26. /* For blowfish, after the magic header, we store 8 bytes of salt and then 8
  27. * bytes of seed (initialisation vector). */
  28. static int crypt_salt_len[] = {0, 8};
  29. static int crypt_seed_len[] = {0, 8};
  30. #define CRYPT_SALT_LEN_MAX 8
  31. #define CRYPT_SEED_LEN_MAX 8
  32. #endif
  33. /* Is there any system that doesn't have access()? */
  34. #define USE_MCH_ACCESS
  35. #if defined(sun) && defined(S_ISCHR)
  36. # define OPEN_CHR_FILES
  37. static int is_dev_fd_file(char_u *fname);
  38. #endif
  39. #ifdef FEAT_MBYTE
  40. static char_u *next_fenc __ARGS((char_u **pp));
  41. # ifdef FEAT_EVAL
  42. static char_u *readfile_charconvert __ARGS((char_u *fname, char_u *fenc, int *fdp));
  43. # endif
  44. #endif
  45. #ifdef FEAT_VIMINFO
  46. static void check_marks_read __ARGS((void));
  47. #endif
  48. #ifdef FEAT_CRYPT
  49. static int crypt_method_from_magic __ARGS((char *ptr, int len));
  50. static char_u *check_for_cryptkey __ARGS((char_u *cryptkey, char_u *ptr, long *sizep, off_t *filesizep, int newfile, char_u *fname, int *did_ask));
  51. #endif
  52. #ifdef UNIX
  53. static void set_file_time __ARGS((char_u *fname, time_t atime, time_t mtime));
  54. #endif
  55. static int set_rw_fname __ARGS((char_u *fname, char_u *sfname));
  56. static int msg_add_fileformat __ARGS((int eol_type));
  57. static void msg_add_eol __ARGS((void));
  58. static int check_mtime __ARGS((buf_T *buf, struct stat *s));
  59. static int time_differs __ARGS((long t1, long t2));
  60. #ifdef FEAT_AUTOCMD
  61. static int apply_autocmds_exarg __ARGS((event_T event, char_u *fname, char_u *fname_io, int force, buf_T *buf, exarg_T *eap));
  62. static int au_find_group __ARGS((char_u *name));
  63. # define AUGROUP_DEFAULT -1 /* default autocmd group */
  64. # define AUGROUP_ERROR -2 /* erroneous autocmd group */
  65. # define AUGROUP_ALL -3 /* all autocmd groups */
  66. #endif
  67. #if defined(FEAT_CRYPT) || defined(FEAT_MBYTE)
  68. # define HAS_BW_FLAGS
  69. # define FIO_LATIN1 0x01 /* convert Latin1 */
  70. # define FIO_UTF8 0x02 /* convert UTF-8 */
  71. # define FIO_UCS2 0x04 /* convert UCS-2 */
  72. # define FIO_UCS4 0x08 /* convert UCS-4 */
  73. # define FIO_UTF16 0x10 /* convert UTF-16 */
  74. # ifdef WIN3264
  75. # define FIO_CODEPAGE 0x20 /* convert MS-Windows codepage */
  76. # define FIO_PUT_CP(x) (((x) & 0xffff) << 16) /* put codepage in top word */
  77. # define FIO_GET_CP(x) (((x)>>16) & 0xffff) /* get codepage from top word */
  78. # endif
  79. # ifdef MACOS_X
  80. # define FIO_MACROMAN 0x20 /* convert MacRoman */
  81. # endif
  82. # define FIO_ENDIAN_L 0x80 /* little endian */
  83. # define FIO_ENCRYPTED 0x1000 /* encrypt written bytes */
  84. # define FIO_NOCONVERT 0x2000 /* skip encoding conversion */
  85. # define FIO_UCSBOM 0x4000 /* check for BOM at start of file */
  86. # define FIO_ALL -1 /* allow all formats */
  87. #endif
  88. /* When converting, a read() or write() may leave some bytes to be converted
  89. * for the next call. The value is guessed... */
  90. #define CONV_RESTLEN 30
  91. /* We have to guess how much a sequence of bytes may expand when converting
  92. * with iconv() to be able to allocate a buffer. */
  93. #define ICONV_MULT 8
  94. /*
  95. * Structure to pass arguments from buf_write() to buf_write_bytes().
  96. */
  97. struct bw_info
  98. {
  99. int bw_fd; /* file descriptor */
  100. char_u *bw_buf; /* buffer with data to be written */
  101. int bw_len; /* length of data */
  102. #ifdef HAS_BW_FLAGS
  103. int bw_flags; /* FIO_ flags */
  104. #endif
  105. #ifdef FEAT_MBYTE
  106. char_u bw_rest[CONV_RESTLEN]; /* not converted bytes */
  107. int bw_restlen; /* nr of bytes in bw_rest[] */
  108. int bw_first; /* first write call */
  109. char_u *bw_conv_buf; /* buffer for writing converted chars */
  110. int bw_conv_buflen; /* size of bw_conv_buf */
  111. int bw_conv_error; /* set for conversion error */
  112. linenr_T bw_conv_error_lnum; /* first line with error or zero */
  113. linenr_T bw_start_lnum; /* line number at start of buffer */
  114. # ifdef USE_ICONV
  115. iconv_t bw_iconv_fd; /* descriptor for iconv() or -1 */
  116. # endif
  117. #endif
  118. };
  119. static int buf_write_bytes __ARGS((struct bw_info *ip));
  120. #ifdef FEAT_MBYTE
  121. static linenr_T readfile_linenr __ARGS((linenr_T linecnt, char_u *p, char_u *endp));
  122. static int ucs2bytes __ARGS((unsigned c, char_u **pp, int flags));
  123. static int need_conversion __ARGS((char_u *fenc));
  124. static int get_fio_flags __ARGS((char_u *ptr));
  125. static char_u *check_for_bom __ARGS((char_u *p, long size, int *lenp, int flags));
  126. static int make_bom __ARGS((char_u *buf, char_u *name));
  127. # ifdef WIN3264
  128. static int get_win_fio_flags __ARGS((char_u *ptr));
  129. # endif
  130. # ifdef MACOS_X
  131. static int get_mac_fio_flags __ARGS((char_u *ptr));
  132. # endif
  133. #endif
  134. static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf));
  135. #ifdef TEMPDIRNAMES
  136. static void vim_settempdir __ARGS((char_u *tempdir));
  137. #endif
  138. #ifdef FEAT_AUTOCMD
  139. static char *e_auchangedbuf = N_("E812: Autocommands changed buffer or buffer name");
  140. #endif
  141. void
  142. filemess(buf, name, s, attr)
  143. buf_T *buf;
  144. char_u *name;
  145. char_u *s;
  146. int attr;
  147. {
  148. int msg_scroll_save;
  149. if (msg_silent != 0)
  150. return;
  151. msg_add_fname(buf, name); /* put file name in IObuff with quotes */
  152. /* If it's extremely long, truncate it. */
  153. if (STRLEN(IObuff) > IOSIZE - 80)
  154. IObuff[IOSIZE - 80] = NUL;
  155. STRCAT(IObuff, s);
  156. /*
  157. * For the first message may have to start a new line.
  158. * For further ones overwrite the previous one, reset msg_scroll before
  159. * calling filemess().
  160. */
  161. msg_scroll_save = msg_scroll;
  162. if (shortmess(SHM_OVERALL) && !exiting && p_verbose == 0)
  163. msg_scroll = FALSE;
  164. if (!msg_scroll) /* wait a bit when overwriting an error msg */
  165. check_for_delay(FALSE);
  166. msg_start();
  167. msg_scroll = msg_scroll_save;
  168. msg_scrolled_ign = TRUE;
  169. /* may truncate the message to avoid a hit-return prompt */
  170. msg_outtrans_attr(msg_may_trunc(FALSE, IObuff), attr);
  171. msg_clr_eos();
  172. out_flush();
  173. msg_scrolled_ign = FALSE;
  174. }
  175. /*
  176. * Read lines from file "fname" into the buffer after line "from".
  177. *
  178. * 1. We allocate blocks with lalloc, as big as possible.
  179. * 2. Each block is filled with characters from the file with a single read().
  180. * 3. The lines are inserted in the buffer with ml_append().
  181. *
  182. * (caller must check that fname != NULL, unless READ_STDIN is used)
  183. *
  184. * "lines_to_skip" is the number of lines that must be skipped
  185. * "lines_to_read" is the number of lines that are appended
  186. * When not recovering lines_to_skip is 0 and lines_to_read MAXLNUM.
  187. *
  188. * flags:
  189. * READ_NEW starting to edit a new buffer
  190. * READ_FILTER reading filter output
  191. * READ_STDIN read from stdin instead of a file
  192. * READ_BUFFER read from curbuf instead of a file (converting after reading
  193. * stdin)
  194. * READ_DUMMY read into a dummy buffer (to check if file contents changed)
  195. * READ_KEEP_UNDO don't clear undo info or read it from a file
  196. *
  197. * return FAIL for failure, OK otherwise
  198. */
  199. int
  200. readfile(fname, sfname, from, lines_to_skip, lines_to_read, eap, flags)
  201. char_u *fname;
  202. char_u *sfname;
  203. linenr_T from;
  204. linenr_T lines_to_skip;
  205. linenr_T lines_to_read;
  206. exarg_T *eap; /* can be NULL! */
  207. int flags;
  208. {
  209. int fd = 0;
  210. int newfile = (flags & READ_NEW);
  211. int check_readonly;
  212. int filtering = (flags & READ_FILTER);
  213. int read_stdin = (flags & READ_STDIN);
  214. int read_buffer = (flags & READ_BUFFER);
  215. int set_options = newfile || read_buffer
  216. || (eap != NULL && eap->read_edit);
  217. linenr_T read_buf_lnum = 1; /* next line to read from curbuf */
  218. colnr_T read_buf_col = 0; /* next char to read from this line */
  219. char_u c;
  220. linenr_T lnum = from;
  221. char_u *ptr = NULL; /* pointer into read buffer */
  222. char_u *buffer = NULL; /* read buffer */
  223. char_u *new_buffer = NULL; /* init to shut up gcc */
  224. char_u *line_start = NULL; /* init to shut up gcc */
  225. int wasempty; /* buffer was empty before reading */
  226. colnr_T len;
  227. long size = 0;
  228. char_u *p;
  229. off_t filesize = 0;
  230. int skip_read = FALSE;
  231. #ifdef FEAT_CRYPT
  232. char_u *cryptkey = NULL;
  233. int did_ask_for_key = FALSE;
  234. int crypt_method_used;
  235. #endif
  236. #ifdef FEAT_PERSISTENT_UNDO
  237. context_sha256_T sha_ctx;
  238. int read_undo_file = FALSE;
  239. #endif
  240. int split = 0; /* number of split lines */
  241. #define UNKNOWN 0x0fffffff /* file size is unknown */
  242. linenr_T linecnt;
  243. int error = FALSE; /* errors encountered */
  244. int ff_error = EOL_UNKNOWN; /* file format with errors */
  245. long linerest = 0; /* remaining chars in line */
  246. #ifdef UNIX
  247. int perm = 0;
  248. int swap_mode = -1; /* protection bits for swap file */
  249. #else
  250. int perm;
  251. #endif
  252. int fileformat = 0; /* end-of-line format */
  253. int keep_fileformat = FALSE;
  254. struct stat st;
  255. int file_readonly;
  256. linenr_T skip_count = 0;
  257. linenr_T read_count = 0;
  258. int msg_save = msg_scroll;
  259. linenr_T read_no_eol_lnum = 0; /* non-zero lnum when last line of
  260. * last read was missing the eol */
  261. int try_mac = (vim_strchr(p_ffs, 'm') != NULL);
  262. int try_dos = (vim_strchr(p_ffs, 'd') != NULL);
  263. int try_unix = (vim_strchr(p_ffs, 'x') != NULL);
  264. int file_rewind = FALSE;
  265. #ifdef FEAT_MBYTE
  266. int can_retry;
  267. linenr_T conv_error = 0; /* line nr with conversion error */
  268. linenr_T illegal_byte = 0; /* line nr with illegal byte */
  269. int keep_dest_enc = FALSE; /* don't retry when char doesn't fit
  270. in destination encoding */
  271. int bad_char_behavior = BAD_REPLACE;
  272. /* BAD_KEEP, BAD_DROP or character to
  273. * replace with */
  274. char_u *tmpname = NULL; /* name of 'charconvert' output file */
  275. int fio_flags = 0;
  276. char_u *fenc; /* fileencoding to use */
  277. int fenc_alloced; /* fenc_next is in allocated memory */
  278. char_u *fenc_next = NULL; /* next item in 'fencs' or NULL */
  279. int advance_fenc = FALSE;
  280. long real_size = 0;
  281. # ifdef USE_ICONV
  282. iconv_t iconv_fd = (iconv_t)-1; /* descriptor for iconv() or -1 */
  283. # ifdef FEAT_EVAL
  284. int did_iconv = FALSE; /* TRUE when iconv() failed and trying
  285. 'charconvert' next */
  286. # endif
  287. # endif
  288. int converted = FALSE; /* TRUE if conversion done */
  289. int notconverted = FALSE; /* TRUE if conversion wanted but it
  290. wasn't possible */
  291. char_u conv_rest[CONV_RESTLEN];
  292. int conv_restlen = 0; /* nr of bytes in conv_rest[] */
  293. #endif
  294. #ifdef FEAT_AUTOCMD
  295. buf_T *old_curbuf;
  296. char_u *old_b_ffname;
  297. char_u *old_b_fname;
  298. int using_b_ffname;
  299. int using_b_fname;
  300. #endif
  301. curbuf->b_no_eol_lnum = 0; /* in case it was set by the previous read */
  302. /*
  303. * If there is no file name yet, use the one for the read file.
  304. * BF_NOTEDITED is set to reflect this.
  305. * Don't do this for a read from a filter.
  306. * Only do this when 'cpoptions' contains the 'f' flag.
  307. */
  308. if (curbuf->b_ffname == NULL
  309. && !filtering
  310. && fname != NULL
  311. && vim_strchr(p_cpo, CPO_FNAMER) != NULL
  312. && !(flags & READ_DUMMY))
  313. {
  314. if (set_rw_fname(fname, sfname) == FAIL)
  315. return FAIL;
  316. }
  317. #ifdef FEAT_AUTOCMD
  318. /* Remember the initial values of curbuf, curbuf->b_ffname and
  319. * curbuf->b_fname to detect whether they are altered as a result of
  320. * executing nasty autocommands. Also check if "fname" and "sfname"
  321. * point to one of these values. */
  322. old_curbuf = curbuf;
  323. old_b_ffname = curbuf->b_ffname;
  324. old_b_fname = curbuf->b_fname;
  325. using_b_ffname = (fname == curbuf->b_ffname)
  326. || (sfname == curbuf->b_ffname);
  327. using_b_fname = (fname == curbuf->b_fname) || (sfname == curbuf->b_fname);
  328. #endif
  329. /* After reading a file the cursor line changes but we don't want to
  330. * display the line. */
  331. ex_no_reprint = TRUE;
  332. /* don't display the file info for another buffer now */
  333. need_fileinfo = FALSE;
  334. /*
  335. * For Unix: Use the short file name whenever possible.
  336. * Avoids problems with networks and when directory names are changed.
  337. * Don't do this for MS-DOS, a "cd" in a sub-shell may have moved us to
  338. * another directory, which we don't detect.
  339. */
  340. if (sfname == NULL)
  341. sfname = fname;
  342. #if defined(UNIX) || defined(__EMX__)
  343. fname = sfname;
  344. #endif
  345. #ifdef FEAT_AUTOCMD
  346. /*
  347. * The BufReadCmd and FileReadCmd events intercept the reading process by
  348. * executing the associated commands instead.
  349. */
  350. if (!filtering && !read_stdin && !read_buffer)
  351. {
  352. pos_T pos;
  353. pos = curbuf->b_op_start;
  354. /* Set '[ mark to the line above where the lines go (line 1 if zero). */
  355. curbuf->b_op_start.lnum = ((from == 0) ? 1 : from);
  356. curbuf->b_op_start.col = 0;
  357. if (newfile)
  358. {
  359. if (apply_autocmds_exarg(EVENT_BUFREADCMD, NULL, sfname,
  360. FALSE, curbuf, eap))
  361. #ifdef FEAT_EVAL
  362. return aborting() ? FAIL : OK;
  363. #else
  364. return OK;
  365. #endif
  366. }
  367. else if (apply_autocmds_exarg(EVENT_FILEREADCMD, sfname, sfname,
  368. FALSE, NULL, eap))
  369. #ifdef FEAT_EVAL
  370. return aborting() ? FAIL : OK;
  371. #else
  372. return OK;
  373. #endif
  374. curbuf->b_op_start = pos;
  375. }
  376. #endif
  377. if ((shortmess(SHM_OVER) || curbuf->b_help) && p_verbose == 0)
  378. msg_scroll = FALSE; /* overwrite previous file message */
  379. else
  380. msg_scroll = TRUE; /* don't overwrite previous file message */
  381. /*
  382. * If the name ends in a path separator, we can't open it. Check here,
  383. * because reading the file may actually work, but then creating the swap
  384. * file may destroy it! Reported on MS-DOS and Win 95.
  385. * If the name is too long we might crash further on, quit here.
  386. */
  387. if (fname != NULL && *fname != NUL)
  388. {
  389. p = fname + STRLEN(fname);
  390. if (after_pathsep(fname, p) || STRLEN(fname) >= MAXPATHL)
  391. {
  392. filemess(curbuf, fname, (char_u *)_("Illegal file name"), 0);
  393. msg_end();
  394. msg_scroll = msg_save;
  395. return FAIL;
  396. }
  397. }
  398. #ifdef UNIX
  399. /*
  400. * On Unix it is possible to read a directory, so we have to
  401. * check for it before the mch_open().
  402. */
  403. if (!read_stdin && !read_buffer)
  404. {
  405. perm = mch_getperm(fname);
  406. if (perm >= 0 && !S_ISREG(perm) /* not a regular file ... */
  407. # ifdef S_ISFIFO
  408. && !S_ISFIFO(perm) /* ... or fifo */
  409. # endif
  410. # ifdef S_ISSOCK
  411. && !S_ISSOCK(perm) /* ... or socket */
  412. # endif
  413. # ifdef OPEN_CHR_FILES
  414. && !(S_ISCHR(perm) && is_dev_fd_file(fname))
  415. /* ... or a character special file named /dev/fd/<n> */
  416. # endif
  417. )
  418. {
  419. if (S_ISDIR(perm))
  420. filemess(curbuf, fname, (char_u *)_("is a directory"), 0);
  421. else
  422. filemess(curbuf, fname, (char_u *)_("is not a file"), 0);
  423. msg_end();
  424. msg_scroll = msg_save;
  425. return FAIL;
  426. }
  427. # if defined(MSDOS) || defined(MSWIN) || defined(OS2)
  428. /*
  429. * MS-Windows allows opening a device, but we will probably get stuck
  430. * trying to read it.
  431. */
  432. if (!p_odev && mch_nodetype(fname) == NODE_WRITABLE)
  433. {
  434. filemess(curbuf, fname, (char_u *)_("is a device (disabled with 'opendevice' option)"), 0);
  435. msg_end();
  436. msg_scroll = msg_save;
  437. return FAIL;
  438. }
  439. # endif
  440. }
  441. #endif
  442. /* set default 'fileformat' */
  443. if (set_options)
  444. {
  445. if (eap != NULL && eap->force_ff != 0)
  446. set_fileformat(get_fileformat_force(curbuf, eap), OPT_LOCAL);
  447. else if (*p_ffs != NUL)
  448. set_fileformat(default_fileformat(), OPT_LOCAL);
  449. }
  450. /* set or reset 'binary' */
  451. if (eap != NULL && eap->force_bin != 0)
  452. {
  453. int oldval = curbuf->b_p_bin;
  454. curbuf->b_p_bin = (eap->force_bin == FORCE_BIN);
  455. set_options_bin(oldval, curbuf->b_p_bin, OPT_LOCAL);
  456. }
  457. /*
  458. * When opening a new file we take the readonly flag from the file.
  459. * Default is r/w, can be set to r/o below.
  460. * Don't reset it when in readonly mode
  461. * Only set/reset b_p_ro when BF_CHECK_RO is set.
  462. */
  463. check_readonly = (newfile && (curbuf->b_flags & BF_CHECK_RO));
  464. if (check_readonly && !readonlymode)
  465. curbuf->b_p_ro = FALSE;
  466. if (newfile && !read_stdin && !read_buffer)
  467. {
  468. /* Remember time of file. */
  469. if (mch_stat((char *)fname, &st) >= 0)
  470. {
  471. buf_store_time(curbuf, &st, fname);
  472. curbuf->b_mtime_read = curbuf->b_mtime;
  473. #ifdef UNIX
  474. /*
  475. * Use the protection bits of the original file for the swap file.
  476. * This makes it possible for others to read the name of the
  477. * edited file from the swapfile, but only if they can read the
  478. * edited file.
  479. * Remove the "write" and "execute" bits for group and others
  480. * (they must not write the swapfile).
  481. * Add the "read" and "write" bits for the user, otherwise we may
  482. * not be able to write to the file ourselves.
  483. * Setting the bits is done below, after creating the swap file.
  484. */
  485. swap_mode = (st.st_mode & 0644) | 0600;
  486. #endif
  487. #ifdef FEAT_CW_EDITOR
  488. /* Get the FSSpec on MacOS
  489. * TODO: Update it properly when the buffer name changes
  490. */
  491. (void)GetFSSpecFromPath(curbuf->b_ffname, &curbuf->b_FSSpec);
  492. #endif
  493. #ifdef VMS
  494. curbuf->b_fab_rfm = st.st_fab_rfm;
  495. curbuf->b_fab_rat = st.st_fab_rat;
  496. curbuf->b_fab_mrs = st.st_fab_mrs;
  497. #endif
  498. }
  499. else
  500. {
  501. curbuf->b_mtime = 0;
  502. curbuf->b_mtime_read = 0;
  503. curbuf->b_orig_size = 0;
  504. curbuf->b_orig_mode = 0;
  505. }
  506. /* Reset the "new file" flag. It will be set again below when the
  507. * file doesn't exist. */
  508. curbuf->b_flags &= ~(BF_NEW | BF_NEW_W);
  509. }
  510. /*
  511. * for UNIX: check readonly with perm and mch_access()
  512. * for MSDOS and Amiga: check readonly by trying to open the file for writing
  513. */
  514. file_readonly = FALSE;
  515. if (read_stdin)
  516. {
  517. #if defined(MSDOS) || defined(MSWIN) || defined(OS2)
  518. /* Force binary I/O on stdin to avoid CR-LF -> LF conversion. */
  519. setmode(0, O_BINARY);
  520. #endif
  521. }
  522. else if (!read_buffer)
  523. {
  524. #ifdef USE_MCH_ACCESS
  525. if (
  526. # ifdef UNIX
  527. !(perm & 0222) ||
  528. # endif
  529. mch_access((char *)fname, W_OK))
  530. file_readonly = TRUE;
  531. fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
  532. #else
  533. if (!newfile
  534. || readonlymode
  535. || (fd = mch_open((char *)fname, O_RDWR | O_EXTRA, 0)) < 0)
  536. {
  537. file_readonly = TRUE;
  538. /* try to open ro */
  539. fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
  540. }
  541. #endif
  542. }
  543. if (fd < 0) /* cannot open at all */
  544. {
  545. #ifndef UNIX
  546. int isdir_f;
  547. #endif
  548. msg_scroll = msg_save;
  549. #ifndef UNIX
  550. /*
  551. * On MSDOS and Amiga we can't open a directory, check here.
  552. */
  553. isdir_f = (mch_isdir(fname));
  554. perm = mch_getperm(fname); /* check if the file exists */
  555. if (isdir_f)
  556. {
  557. filemess(curbuf, sfname, (char_u *)_("is a directory"), 0);
  558. curbuf->b_p_ro = TRUE; /* must use "w!" now */
  559. }
  560. else
  561. #endif
  562. if (newfile)
  563. {
  564. if (perm < 0
  565. #ifdef ENOENT
  566. && errno == ENOENT
  567. #endif
  568. )
  569. {
  570. /*
  571. * Set the 'new-file' flag, so that when the file has
  572. * been created by someone else, a ":w" will complain.
  573. */
  574. curbuf->b_flags |= BF_NEW;
  575. /* Create a swap file now, so that other Vims are warned
  576. * that we are editing this file. Don't do this for a
  577. * "nofile" or "nowrite" buffer type. */
  578. #ifdef FEAT_QUICKFIX
  579. if (!bt_dontwrite(curbuf))
  580. #endif
  581. {
  582. check_need_swap(newfile);
  583. #ifdef FEAT_AUTOCMD
  584. /* SwapExists autocommand may mess things up */
  585. if (curbuf != old_curbuf
  586. || (using_b_ffname
  587. && (old_b_ffname != curbuf->b_ffname))
  588. || (using_b_fname
  589. && (old_b_fname != curbuf->b_fname)))
  590. {
  591. EMSG(_(e_auchangedbuf));
  592. return FAIL;
  593. }
  594. #endif
  595. }
  596. if (dir_of_file_exists(fname))
  597. filemess(curbuf, sfname, (char_u *)_("[New File]"), 0);
  598. else
  599. filemess(curbuf, sfname,
  600. (char_u *)_("[New DIRECTORY]"), 0);
  601. #ifdef FEAT_VIMINFO
  602. /* Even though this is a new file, it might have been
  603. * edited before and deleted. Get the old marks. */
  604. check_marks_read();
  605. #endif
  606. #ifdef FEAT_MBYTE
  607. if (eap != NULL && eap->force_enc != 0)
  608. {
  609. /* set forced 'fileencoding' */
  610. fenc = enc_canonize(eap->cmd + eap->force_enc);
  611. if (fenc != NULL)
  612. set_string_option_direct((char_u *)"fenc", -1,
  613. fenc, OPT_FREE|OPT_LOCAL, 0);
  614. vim_free(fenc);
  615. }
  616. #endif
  617. #ifdef FEAT_AUTOCMD
  618. apply_autocmds_exarg(EVENT_BUFNEWFILE, sfname, sfname,
  619. FALSE, curbuf, eap);
  620. #endif
  621. /* remember the current fileformat */
  622. save_file_ff(curbuf);
  623. #if defined(FEAT_AUTOCMD) && defined(FEAT_EVAL)
  624. if (aborting()) /* autocmds may abort script processing */
  625. return FAIL;
  626. #endif
  627. return OK; /* a new file is not an error */
  628. }
  629. else
  630. {
  631. filemess(curbuf, sfname, (char_u *)(
  632. # ifdef EFBIG
  633. (errno == EFBIG) ? _("[File too big]") :
  634. # endif
  635. # ifdef EOVERFLOW
  636. (errno == EOVERFLOW) ? _("[File too big]") :
  637. # endif
  638. _("[Permission Denied]")), 0);
  639. curbuf->b_p_ro = TRUE; /* must use "w!" now */
  640. }
  641. }
  642. return FAIL;
  643. }
  644. /*
  645. * Only set the 'ro' flag for readonly files the first time they are
  646. * loaded. Help files always get readonly mode
  647. */
  648. if ((check_readonly && file_readonly) || curbuf->b_help)
  649. curbuf->b_p_ro = TRUE;
  650. if (set_options)
  651. {
  652. /* Don't change 'eol' if reading from buffer as it will already be
  653. * correctly set when reading stdin. */
  654. if (!read_buffer)
  655. {
  656. curbuf->b_p_eol = TRUE;
  657. curbuf->b_start_eol = TRUE;
  658. }
  659. #ifdef FEAT_MBYTE
  660. curbuf->b_p_bomb = FALSE;
  661. curbuf->b_start_bomb = FALSE;
  662. #endif
  663. }
  664. /* Create a swap file now, so that other Vims are warned that we are
  665. * editing this file.
  666. * Don't do this for a "nofile" or "nowrite" buffer type. */
  667. #ifdef FEAT_QUICKFIX
  668. if (!bt_dontwrite(curbuf))
  669. #endif
  670. {
  671. check_need_swap(newfile);
  672. #ifdef FEAT_AUTOCMD
  673. if (!read_stdin && (curbuf != old_curbuf
  674. || (using_b_ffname && (old_b_ffname != curbuf->b_ffname))
  675. || (using_b_fname && (old_b_fname != curbuf->b_fname))))
  676. {
  677. EMSG(_(e_auchangedbuf));
  678. if (!read_buffer)
  679. close(fd);
  680. return FAIL;
  681. }
  682. #endif
  683. #ifdef UNIX
  684. /* Set swap file protection bits after creating it. */
  685. if (swap_mode > 0 && curbuf->b_ml.ml_mfp != NULL
  686. && curbuf->b_ml.ml_mfp->mf_fname != NULL)
  687. (void)mch_setperm(curbuf->b_ml.ml_mfp->mf_fname, (long)swap_mode);
  688. #endif
  689. }
  690. #if defined(HAS_SWAP_EXISTS_ACTION)
  691. /* If "Quit" selected at ATTENTION dialog, don't load the file */
  692. if (swap_exists_action == SEA_QUIT)
  693. {
  694. if (!read_buffer && !read_stdin)
  695. close(fd);
  696. return FAIL;
  697. }
  698. #endif
  699. ++no_wait_return; /* don't wait for return yet */
  700. /*
  701. * Set '[ mark to the line above where the lines go (line 1 if zero).
  702. */
  703. curbuf->b_op_start.lnum = ((from == 0) ? 1 : from);
  704. curbuf->b_op_start.col = 0;
  705. #ifdef FEAT_AUTOCMD
  706. if (!read_buffer)
  707. {
  708. int m = msg_scroll;
  709. int n = msg_scrolled;
  710. /*
  711. * The file must be closed again, the autocommands may want to change
  712. * the file before reading it.
  713. */
  714. if (!read_stdin)
  715. close(fd); /* ignore errors */
  716. /*
  717. * The output from the autocommands should not overwrite anything and
  718. * should not be overwritten: Set msg_scroll, restore its value if no
  719. * output was done.
  720. */
  721. msg_scroll = TRUE;
  722. if (filtering)
  723. apply_autocmds_exarg(EVENT_FILTERREADPRE, NULL, sfname,
  724. FALSE, curbuf, eap);
  725. else if (read_stdin)
  726. apply_autocmds_exarg(EVENT_STDINREADPRE, NULL, sfname,
  727. FALSE, curbuf, eap);
  728. else if (newfile)
  729. apply_autocmds_exarg(EVENT_BUFREADPRE, NULL, sfname,
  730. FALSE, curbuf, eap);
  731. else
  732. apply_autocmds_exarg(EVENT_FILEREADPRE, sfname, sfname,
  733. FALSE, NULL, eap);
  734. if (msg_scrolled == n)
  735. msg_scroll = m;
  736. #ifdef FEAT_EVAL
  737. if (aborting()) /* autocmds may abort script processing */
  738. {
  739. --no_wait_return;
  740. msg_scroll = msg_save;
  741. curbuf->b_p_ro = TRUE; /* must use "w!" now */
  742. return FAIL;
  743. }
  744. #endif
  745. /*
  746. * Don't allow the autocommands to change the current buffer.
  747. * Try to re-open the file.
  748. *
  749. * Don't allow the autocommands to change the buffer name either
  750. * (cd for example) if it invalidates fname or sfname.
  751. */
  752. if (!read_stdin && (curbuf != old_curbuf
  753. || (using_b_ffname && (old_b_ffname != curbuf->b_ffname))
  754. || (using_b_fname && (old_b_fname != curbuf->b_fname))
  755. || (fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0)) < 0))
  756. {
  757. --no_wait_return;
  758. msg_scroll = msg_save;
  759. if (fd < 0)
  760. EMSG(_("E200: *ReadPre autocommands made the file unreadable"));
  761. else
  762. EMSG(_("E201: *ReadPre autocommands must not change current buffer"));
  763. curbuf->b_p_ro = TRUE; /* must use "w!" now */
  764. return FAIL;
  765. }
  766. }
  767. #endif /* FEAT_AUTOCMD */
  768. /* Autocommands may add lines to the file, need to check if it is empty */
  769. wasempty = (curbuf->b_ml.ml_flags & ML_EMPTY);
  770. if (!recoverymode && !filtering && !(flags & READ_DUMMY))
  771. {
  772. /*
  773. * Show the user that we are busy reading the input. Sometimes this
  774. * may take a while. When reading from stdin another program may
  775. * still be running, don't move the cursor to the last line, unless
  776. * always using the GUI.
  777. */
  778. if (read_stdin)
  779. {
  780. #ifndef ALWAYS_USE_GUI
  781. mch_msg(_("Vim: Reading from stdin...\n"));
  782. #endif
  783. #ifdef FEAT_GUI
  784. /* Also write a message in the GUI window, if there is one. */
  785. if (gui.in_use && !gui.dying && !gui.starting)
  786. {
  787. p = (char_u *)_("Reading from stdin...");
  788. gui_write(p, (int)STRLEN(p));
  789. }
  790. #endif
  791. }
  792. else if (!read_buffer)
  793. filemess(curbuf, sfname, (char_u *)"", 0);
  794. }
  795. msg_scroll = FALSE; /* overwrite the file message */
  796. /*
  797. * Set linecnt now, before the "retry" caused by a wrong guess for
  798. * fileformat, and after the autocommands, which may change them.
  799. */
  800. linecnt = curbuf->b_ml.ml_line_count;
  801. #ifdef FEAT_MBYTE
  802. /* "++bad=" argument. */
  803. if (eap != NULL && eap->bad_char != 0)
  804. {
  805. bad_char_behavior = eap->bad_char;
  806. if (set_options)
  807. curbuf->b_bad_char = eap->bad_char;
  808. }
  809. else
  810. curbuf->b_bad_char = 0;
  811. /*
  812. * Decide which 'encoding' to use or use first.
  813. */
  814. if (eap != NULL && eap->force_enc != 0)
  815. {
  816. fenc = enc_canonize(eap->cmd + eap->force_enc);
  817. fenc_alloced = TRUE;
  818. keep_dest_enc = TRUE;
  819. }
  820. else if (curbuf->b_p_bin)
  821. {
  822. fenc = (char_u *)""; /* binary: don't convert */
  823. fenc_alloced = FALSE;
  824. }
  825. else if (curbuf->b_help)
  826. {
  827. char_u firstline[80];
  828. int fc;
  829. /* Help files are either utf-8 or latin1. Try utf-8 first, if this
  830. * fails it must be latin1.
  831. * Always do this when 'encoding' is "utf-8". Otherwise only do
  832. * this when needed to avoid [converted] remarks all the time.
  833. * It is needed when the first line contains non-ASCII characters.
  834. * That is only in *.??x files. */
  835. fenc = (char_u *)"latin1";
  836. c = enc_utf8;
  837. if (!c && !read_stdin)
  838. {
  839. fc = fname[STRLEN(fname) - 1];
  840. if (TOLOWER_ASC(fc) == 'x')
  841. {
  842. /* Read the first line (and a bit more). Immediately rewind to
  843. * the start of the file. If the read() fails "len" is -1. */
  844. len = read_eintr(fd, firstline, 80);
  845. lseek(fd, (off_t)0L, SEEK_SET);
  846. for (p = firstline; p < firstline + len; ++p)
  847. if (*p >= 0x80)
  848. {
  849. c = TRUE;
  850. break;
  851. }
  852. }
  853. }
  854. if (c)
  855. {
  856. fenc_next = fenc;
  857. fenc = (char_u *)"utf-8";
  858. /* When the file is utf-8 but a character doesn't fit in
  859. * 'encoding' don't retry. In help text editing utf-8 bytes
  860. * doesn't make sense. */
  861. if (!enc_utf8)
  862. keep_dest_enc = TRUE;
  863. }
  864. fenc_alloced = FALSE;
  865. }
  866. else if (*p_fencs == NUL)
  867. {
  868. fenc = curbuf->b_p_fenc; /* use format from buffer */
  869. fenc_alloced = FALSE;
  870. }
  871. else
  872. {
  873. fenc_next = p_fencs; /* try items in 'fileencodings' */
  874. fenc = next_fenc(&fenc_next);
  875. fenc_alloced = TRUE;
  876. }
  877. #endif
  878. /*
  879. * Jump back here to retry reading the file in different ways.
  880. * Reasons to retry:
  881. * - encoding conversion failed: try another one from "fenc_next"
  882. * - BOM detected and fenc was set, need to setup conversion
  883. * - "fileformat" check failed: try another
  884. *
  885. * Variables set for special retry actions:
  886. * "file_rewind" Rewind the file to start reading it again.
  887. * "advance_fenc" Advance "fenc" using "fenc_next".
  888. * "skip_read" Re-use already read bytes (BOM detected).
  889. * "did_iconv" iconv() conversion failed, try 'charconvert'.
  890. * "keep_fileformat" Don't reset "fileformat".
  891. *
  892. * Other status indicators:
  893. * "tmpname" When != NULL did conversion with 'charconvert'.
  894. * Output file has to be deleted afterwards.
  895. * "iconv_fd" When != -1 did conversion with iconv().
  896. */
  897. retry:
  898. if (file_rewind)
  899. {
  900. if (read_buffer)
  901. {
  902. read_buf_lnum = 1;
  903. read_buf_col = 0;
  904. }
  905. else if (read_stdin || lseek(fd, (off_t)0L, SEEK_SET) != 0)
  906. {
  907. /* Can't rewind the file, give up. */
  908. error = TRUE;
  909. goto failed;
  910. }
  911. /* Delete the previously read lines. */
  912. while (lnum > from)
  913. ml_delete(lnum--, FALSE);
  914. file_rewind = FALSE;
  915. #ifdef FEAT_MBYTE
  916. if (set_options)
  917. {
  918. curbuf->b_p_bomb = FALSE;
  919. curbuf->b_start_bomb = FALSE;
  920. }
  921. conv_error = 0;
  922. #endif
  923. }
  924. #ifdef FEAT_CRYPT
  925. if (cryptkey != NULL)
  926. /* Need to reset the state, but keep the key, don't want to ask for it
  927. * again. */
  928. crypt_pop_state();
  929. #endif
  930. /*
  931. * When retrying with another "fenc" and the first time "fileformat"
  932. * will be reset.
  933. */
  934. if (keep_fileformat)
  935. keep_fileformat = FALSE;
  936. else
  937. {
  938. if (eap != NULL && eap->force_ff != 0)
  939. {
  940. fileformat = get_fileformat_force(curbuf, eap);
  941. try_unix = try_dos = try_mac = FALSE;
  942. }
  943. else if (curbuf->b_p_bin)
  944. fileformat = EOL_UNIX; /* binary: use Unix format */
  945. else if (*p_ffs == NUL)
  946. fileformat = get_fileformat(curbuf);/* use format from buffer */
  947. else
  948. fileformat = EOL_UNKNOWN; /* detect from file */
  949. }
  950. #ifdef FEAT_MBYTE
  951. # ifdef USE_ICONV
  952. if (iconv_fd != (iconv_t)-1)
  953. {
  954. /* aborted conversion with iconv(), close the descriptor */
  955. iconv_close(iconv_fd);
  956. iconv_fd = (iconv_t)-1;
  957. }
  958. # endif
  959. if (advance_fenc)
  960. {
  961. /*
  962. * Try the next entry in 'fileencodings'.
  963. */
  964. advance_fenc = FALSE;
  965. if (eap != NULL && eap->force_enc != 0)
  966. {
  967. /* Conversion given with "++cc=" wasn't possible, read
  968. * without conversion. */
  969. notconverted = TRUE;
  970. conv_error = 0;
  971. if (fenc_alloced)
  972. vim_free(fenc);
  973. fenc = (char_u *)"";
  974. fenc_alloced = FALSE;
  975. }
  976. else
  977. {
  978. if (fenc_alloced)
  979. vim_free(fenc);
  980. if (fenc_next != NULL)
  981. {
  982. fenc = next_fenc(&fenc_next);
  983. fenc_alloced = (fenc_next != NULL);
  984. }
  985. else
  986. {
  987. fenc = (char_u *)"";
  988. fenc_alloced = FALSE;
  989. }
  990. }
  991. if (tmpname != NULL)
  992. {
  993. mch_remove(tmpname); /* delete converted file */
  994. vim_free(tmpname);
  995. tmpname = NULL;
  996. }
  997. }
  998. /*
  999. * Conversion may be required when the encoding of the file is different
  1000. * from 'encoding' or 'encoding' is UTF-16, UCS-2 or UCS-4.
  1001. */
  1002. fio_flags = 0;
  1003. converted = need_conversion(fenc);
  1004. if (converted)
  1005. {
  1006. /* "ucs-bom" means we need to check the first bytes of the file
  1007. * for a BOM. */
  1008. if (STRCMP(fenc, ENC_UCSBOM) == 0)
  1009. fio_flags = FIO_UCSBOM;
  1010. /*
  1011. * Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be
  1012. * done. This is handled below after read(). Prepare the
  1013. * fio_flags to avoid having to parse the string each time.
  1014. * Also check for Unicode to Latin1 conversion, because iconv()
  1015. * appears not to handle this correctly. This works just like
  1016. * conversion to UTF-8 except how the resulting character is put in
  1017. * the buffer.
  1018. */
  1019. else if (enc_utf8 || STRCMP(p_enc, "latin1") == 0)
  1020. fio_flags = get_fio_flags(fenc);
  1021. # ifdef WIN3264
  1022. /*
  1023. * Conversion from an MS-Windows codepage to UTF-8 or another codepage
  1024. * is handled with MultiByteToWideChar().
  1025. */
  1026. if (fio_flags == 0)
  1027. fio_flags = get_win_fio_flags(fenc);
  1028. # endif
  1029. # ifdef MACOS_X
  1030. /* Conversion from Apple MacRoman to latin1 or UTF-8 */
  1031. if (fio_flags == 0)
  1032. fio_flags = get_mac_fio_flags(fenc);
  1033. # endif
  1034. # ifdef USE_ICONV
  1035. /*
  1036. * Try using iconv() if we can't convert internally.
  1037. */
  1038. if (fio_flags == 0
  1039. # ifdef FEAT_EVAL
  1040. && !did_iconv
  1041. # endif
  1042. )
  1043. iconv_fd = (iconv_t)my_iconv_open(
  1044. enc_utf8 ? (char_u *)"utf-8" : p_enc, fenc);
  1045. # endif
  1046. # ifdef FEAT_EVAL
  1047. /*
  1048. * Use the 'charconvert' expression when conversion is required
  1049. * and we can't do it internally or with iconv().
  1050. */
  1051. if (fio_flags == 0 && !read_stdin && !read_buffer && *p_ccv != NUL
  1052. # ifdef USE_ICONV
  1053. && iconv_fd == (iconv_t)-1
  1054. # endif
  1055. )
  1056. {
  1057. # ifdef USE_ICONV
  1058. did_iconv = FALSE;
  1059. # endif
  1060. /* Skip conversion when it's already done (retry for wrong
  1061. * "fileformat"). */
  1062. if (tmpname == NULL)
  1063. {
  1064. tmpname = readfile_charconvert(fname, fenc, &fd);
  1065. if (tmpname == NULL)
  1066. {
  1067. /* Conversion failed. Try another one. */
  1068. advance_fenc = TRUE;
  1069. if (fd < 0)
  1070. {
  1071. /* Re-opening the original file failed! */
  1072. EMSG(_("E202: Conversion made file unreadable!"));
  1073. error = TRUE;
  1074. goto failed;
  1075. }
  1076. goto retry;
  1077. }
  1078. }
  1079. }
  1080. else
  1081. # endif
  1082. {
  1083. if (fio_flags == 0
  1084. # ifdef USE_ICONV
  1085. && iconv_fd == (iconv_t)-1
  1086. # endif
  1087. )
  1088. {
  1089. /* Conversion wanted but we can't.
  1090. * Try the next conversion in 'fileencodings' */
  1091. advance_fenc = TRUE;
  1092. goto retry;
  1093. }
  1094. }
  1095. }
  1096. /* Set "can_retry" when it's possible to rewind the file and try with
  1097. * another "fenc" value. It's FALSE when no other "fenc" to try, reading
  1098. * stdin or fixed at a specific encoding. */
  1099. can_retry = (*fenc != NUL && !read_stdin && !keep_dest_enc);
  1100. #endif
  1101. if (!skip_read)
  1102. {
  1103. linerest = 0;
  1104. filesize = 0;
  1105. skip_count = lines_to_skip;
  1106. read_count = lines_to_read;
  1107. #ifdef FEAT_MBYTE
  1108. conv_restlen = 0;
  1109. #endif
  1110. #ifdef FEAT_PERSISTENT_UNDO
  1111. read_undo_file = (newfile && (flags & READ_KEEP_UNDO) == 0
  1112. && curbuf->b_ffname != NULL
  1113. && curbuf->b_p_udf
  1114. && !filtering
  1115. && !read_stdin
  1116. && !read_buffer);
  1117. if (read_undo_file)
  1118. sha256_start(&sha_ctx);
  1119. #endif
  1120. }
  1121. while (!error && !got_int)
  1122. {
  1123. /*
  1124. * We allocate as much space for the file as we can get, plus
  1125. * space for the old line plus room for one terminating NUL.
  1126. * The amount is limited by the fact that read() only can read
  1127. * upto max_unsigned characters (and other things).
  1128. */
  1129. #if SIZEOF_INT <= 2
  1130. if (linerest >= 0x7ff0)
  1131. {
  1132. ++split;
  1133. *ptr = NL; /* split line by inserting a NL */
  1134. size = 1;
  1135. }
  1136. else
  1137. #endif
  1138. {
  1139. if (!skip_read)
  1140. {
  1141. #if SIZEOF_INT > 2
  1142. # if defined(SSIZE_MAX) && (SSIZE_MAX < 0x10000L)
  1143. size = SSIZE_MAX; /* use max I/O size, 52K */
  1144. # else
  1145. size = 0x10000L; /* use buffer >= 64K */
  1146. # endif
  1147. #else
  1148. size = 0x7ff0L - linerest; /* limit buffer to 32K */
  1149. #endif
  1150. for ( ; size >= 10; size = (long)((long_u)size >> 1))
  1151. {
  1152. if ((new_buffer = lalloc((long_u)(size + linerest + 1),
  1153. FALSE)) != NULL)
  1154. break;
  1155. }
  1156. if (new_buffer == NULL)
  1157. {
  1158. do_outofmem_msg((long_u)(size * 2 + linerest + 1));
  1159. error = TRUE;
  1160. break;
  1161. }
  1162. if (linerest) /* copy characters from the previous buffer */
  1163. mch_memmove(new_buffer, ptr - linerest, (size_t)linerest);
  1164. vim_free(buffer);
  1165. buffer = new_buffer;
  1166. ptr = buffer + linerest;
  1167. line_start = buffer;
  1168. #ifdef FEAT_MBYTE
  1169. /* May need room to translate into.
  1170. * For iconv() we don't really know the required space, use a
  1171. * factor ICONV_MULT.
  1172. * latin1 to utf-8: 1 byte becomes up to 2 bytes
  1173. * utf-16 to utf-8: 2 bytes become up to 3 bytes, 4 bytes
  1174. * become up to 4 bytes, size must be multiple of 2
  1175. * ucs-2 to utf-8: 2 bytes become up to 3 bytes, size must be
  1176. * multiple of 2
  1177. * ucs-4 to utf-8: 4 bytes become up to 6 bytes, size must be
  1178. * multiple of 4 */
  1179. real_size = (int)size;
  1180. # ifdef USE_ICONV
  1181. if (iconv_fd != (iconv_t)-1)
  1182. size = size / ICONV_MULT;
  1183. else
  1184. # endif
  1185. if (fio_flags & FIO_LATIN1)
  1186. size = size / 2;
  1187. else if (fio_flags & (FIO_UCS2 | FIO_UTF16))
  1188. size = (size * 2 / 3) & ~1;
  1189. else if (fio_flags & FIO_UCS4)
  1190. size = (size * 2 / 3) & ~3;
  1191. else if (fio_flags == FIO_UCSBOM)
  1192. size = size / ICONV_MULT; /* worst case */
  1193. # ifdef WIN3264
  1194. else if (fio_flags & FIO_CODEPAGE)
  1195. size = size / ICONV_MULT; /* also worst case */
  1196. # endif
  1197. # ifdef MACOS_X
  1198. else if (fio_flags & FIO_MACROMAN)
  1199. size = size / ICONV_MULT; /* also worst case */
  1200. # endif
  1201. #endif
  1202. #ifdef FEAT_MBYTE
  1203. if (conv_restlen > 0)
  1204. {
  1205. /* Insert unconverted bytes from previous line. */
  1206. mch_memmove(ptr, conv_rest, conv_restlen);
  1207. ptr += conv_restlen;
  1208. size -= conv_restlen;
  1209. }
  1210. #endif
  1211. if (read_buffer)
  1212. {
  1213. /*
  1214. * Read bytes from curbuf. Used for converting text read
  1215. * from stdin.
  1216. */
  1217. if (read_buf_lnum > from)
  1218. size = 0;
  1219. else
  1220. {
  1221. int n, ni;
  1222. long tlen;
  1223. tlen = 0;
  1224. for (;;)
  1225. {
  1226. p = ml_get(read_buf_lnum) + read_buf_col;
  1227. n = (int)STRLEN(p);
  1228. if ((int)tlen + n + 1 > size)
  1229. {
  1230. /* Filled up to "size", append partial line.
  1231. * Change NL to NUL to reverse the effect done
  1232. * below. */
  1233. n = (int)(size - tlen);
  1234. for (ni = 0; ni < n; ++ni)
  1235. {
  1236. if (p[ni] == NL)
  1237. ptr[tlen++] = NUL;
  1238. else
  1239. ptr[tlen++] = p[ni];
  1240. }
  1241. read_buf_col += n;
  1242. break;
  1243. }
  1244. else
  1245. {
  1246. /* Append whole line and new-line. Change NL
  1247. * to NUL to reverse the effect done below. */
  1248. for (ni = 0; ni < n; ++ni)
  1249. {
  1250. if (p[ni] == NL)
  1251. ptr[tlen++] = NUL;
  1252. else
  1253. ptr[tlen++] = p[ni];
  1254. }
  1255. ptr[tlen++] = NL;
  1256. read_buf_col = 0;
  1257. if (++read_buf_lnum > from)
  1258. {
  1259. /* When the last line didn't have an
  1260. * end-of-line don't add it now either. */
  1261. if (!curbuf->b_p_eol)
  1262. --tlen;
  1263. size = tlen;
  1264. break;
  1265. }
  1266. }
  1267. }
  1268. }
  1269. }
  1270. else
  1271. {
  1272. /*
  1273. * Read bytes from the file.
  1274. */
  1275. size = read_eintr(fd, ptr, size);
  1276. }
  1277. if (size <= 0)
  1278. {
  1279. if (size < 0) /* read error */
  1280. error = TRUE;
  1281. #ifdef FEAT_MBYTE
  1282. else if (conv_restlen > 0)
  1283. {
  1284. /*
  1285. * Reached end-of-file but some trailing bytes could
  1286. * not be converted. Truncated file?
  1287. */
  1288. /* When we did a conversion report an error. */
  1289. if (fio_flags != 0
  1290. # ifdef USE_ICONV
  1291. || iconv_fd != (iconv_t)-1
  1292. # endif
  1293. )
  1294. {
  1295. if (conv_error == 0)
  1296. conv_error = curbuf->b_ml.ml_line_count
  1297. - linecnt + 1;
  1298. }
  1299. /* Remember the first linenr with an illegal byte */
  1300. else if (illegal_byte == 0)
  1301. illegal_byte = curbuf->b_ml.ml_line_count
  1302. - linecnt + 1;
  1303. if (bad_char_behavior == BAD_DROP)
  1304. {
  1305. *(ptr - conv_restlen) = NUL;
  1306. conv_restlen = 0;
  1307. }
  1308. else
  1309. {
  1310. /* Replace the trailing bytes with the replacement
  1311. * character if we were converting; if we weren't,
  1312. * leave the UTF8 checking code to do it, as it
  1313. * works slightly differently. */
  1314. if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
  1315. # ifdef USE_ICONV
  1316. || iconv_fd != (iconv_t)-1
  1317. # endif
  1318. ))
  1319. {
  1320. while (conv_restlen > 0)
  1321. {
  1322. *(--ptr) = bad_char_behavior;
  1323. --conv_restlen;
  1324. }
  1325. }
  1326. fio_flags = 0; /* don't convert this */
  1327. # ifdef USE_ICONV
  1328. if (iconv_fd != (iconv_t)-1)
  1329. {
  1330. iconv_close(iconv_fd);
  1331. iconv_fd = (iconv_t)-1;
  1332. }
  1333. # endif
  1334. }
  1335. }
  1336. #endif
  1337. }
  1338. #ifdef FEAT_CRYPT
  1339. /*
  1340. * At start of file: Check for magic number of encryption.
  1341. */
  1342. if (filesize == 0)
  1343. cryptkey = check_for_cryptkey(cryptkey, ptr, &size,
  1344. &filesize, newfile, sfname,
  1345. &did_ask_for_key);
  1346. /*
  1347. * Decrypt the read bytes.
  1348. */
  1349. if (cryptkey != NULL && size > 0)
  1350. crypt_decode(ptr, size);
  1351. #endif
  1352. }
  1353. skip_read = FALSE;
  1354. #ifdef FEAT_MBYTE
  1355. /*
  1356. * At start of file (or after crypt magic number): Check for BOM.
  1357. * Also check for a BOM for other Unicode encodings, but not after
  1358. * converting with 'charconvert' or when a BOM has already been
  1359. * found.
  1360. */
  1361. if ((filesize == 0
  1362. # ifdef FEAT_CRYPT
  1363. || (filesize == (CRYPT_MAGIC_LEN
  1364. + crypt_salt_len[use_crypt_method]
  1365. + crypt_seed_len[use_crypt_method])
  1366. && cryptkey != NULL)
  1367. # endif
  1368. )
  1369. && (fio_flags == FIO_UCSBOM
  1370. || (!curbuf->b_p_bomb
  1371. && tmpname == NULL
  1372. && (*fenc == 'u' || (*fenc == NUL && enc_utf8)))))
  1373. {
  1374. char_u *ccname;
  1375. int blen;
  1376. /* no BOM detection in a short file or in binary mode */
  1377. if (size < 2 || curbuf->b_p_bin)
  1378. ccname = NULL;
  1379. else
  1380. ccname = check_for_bom(ptr, size, &blen,
  1381. fio_flags == FIO_UCSBOM ? FIO_ALL : get_fio_flags(fenc));
  1382. if (ccname != NULL)
  1383. {
  1384. /* Remove BOM from the text */
  1385. filesize += blen;
  1386. size -= blen;
  1387. mch_memmove(ptr, ptr + blen, (size_t)size);
  1388. if (set_options)
  1389. {
  1390. curbuf->b_p_bomb = TRUE;
  1391. curbuf->b_start_bomb = TRUE;
  1392. }
  1393. }
  1394. if (fio_flags == FIO_UCSBOM)
  1395. {
  1396. if (ccname == NULL)
  1397. {
  1398. /* No BOM detected: retry with next encoding. */
  1399. advance_fenc = TRUE;
  1400. }
  1401. else
  1402. {
  1403. /* BOM detected: set "fenc" and jump back */
  1404. if (fenc_alloced)
  1405. vim_free(fenc);
  1406. fenc = ccname;
  1407. fenc_alloced = FALSE;
  1408. }
  1409. /* retry reading without getting new bytes or rewinding */
  1410. skip_read = TRUE;
  1411. goto retry;
  1412. }
  1413. }
  1414. /* Include not converted bytes. */
  1415. ptr -= conv_restlen;
  1416. size += conv_restlen;
  1417. conv_restlen = 0;
  1418. #endif
  1419. /*
  1420. * Break here for a read error or end-of-file.
  1421. */
  1422. if (size <= 0)
  1423. break;
  1424. #ifdef FEAT_MBYTE
  1425. # ifdef USE_ICONV
  1426. if (iconv_fd != (iconv_t)-1)
  1427. {
  1428. /*
  1429. * Attempt conversion of the read bytes to 'encoding' using
  1430. * iconv().
  1431. */
  1432. const char *fromp;
  1433. char *top;
  1434. size_t from_size;
  1435. size_t to_size;
  1436. fromp = (char *)ptr;
  1437. from_size = size;
  1438. ptr += size;
  1439. top = (char *)ptr;
  1440. to_size = real_size - size;
  1441. /*
  1442. * If there is conversion error or not enough room try using
  1443. * another conversion. Except for when there is no
  1444. * alternative (help files).
  1445. */
  1446. while ((iconv(iconv_fd, (void *)&fromp, &from_size,
  1447. &top, &to_size)
  1448. == (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL)
  1449. || from_size > CONV_RESTLEN)
  1450. {
  1451. if (can_retry)
  1452. goto rewind_retry;
  1453. if (conv_error == 0)
  1454. conv_error = readfile_linenr(linecnt,
  1455. ptr, (char_u *)top);
  1456. /* Deal with a bad byte and continue with the next. */
  1457. ++fromp;
  1458. --from_size;
  1459. if (bad_char_behavior == BAD_KEEP)
  1460. {
  1461. *top++ = *(fromp - 1);
  1462. --to_size;
  1463. }
  1464. else if (bad_char_behavior != BAD_DROP)
  1465. {
  1466. *top++ = bad_char_behavior;
  1467. --to_size;
  1468. }
  1469. }
  1470. if (from_size > 0)
  1471. {
  1472. /* Some remaining characters, keep them for the next
  1473. * round. */
  1474. mch_memmove(conv_rest, (char_u *)fromp, from_size);
  1475. conv_restlen = (int)from_size;
  1476. }
  1477. /* move the linerest to before the converted characters */
  1478. line_start = ptr - linerest;
  1479. mch_memmove(line_start, buffer, (size_t)linerest);
  1480. size = (long)((char_u *)top - ptr);
  1481. }
  1482. # endif
  1483. # ifdef WIN3264
  1484. if (fio_flags & FIO_CODEPAGE)
  1485. {
  1486. char_u *src, *dst;
  1487. WCHAR ucs2buf[3];
  1488. int ucs2len;
  1489. int codepage = FIO_GET_CP(fio_flags);
  1490. int bytelen;
  1491. int found_bad;
  1492. char replstr[2];
  1493. /*
  1494. * Conversion from an MS-Windows codepage or UTF-8 to UTF-8 or
  1495. * a codepage, using standard MS-Windows functions. This
  1496. * requires two steps:
  1497. * 1. convert from 'fileencoding' to ucs-2
  1498. * 2. convert from ucs-2 to 'encoding'
  1499. *
  1500. * Because there may be illegal bytes AND an incomplete byte
  1501. * sequence at the end, we may have to do the conversion one
  1502. * character at a time to get it right.
  1503. */
  1504. /* Replacement string for WideCharToMultiByte(). */
  1505. if (bad_char_behavior > 0)
  1506. replstr[0] = bad_char_behavior;
  1507. else
  1508. replstr[0] = '?';
  1509. replstr[1] = NUL;
  1510. /*
  1511. * Move the bytes to the end of the buffer, so that we have
  1512. * room to put the result at the start.
  1513. */
  1514. src = ptr + real_size - size;
  1515. mch_memmove(src, ptr, size);
  1516. /*
  1517. * Do the conversion.
  1518. */
  1519. dst = ptr;
  1520. size = size;
  1521. while (size > 0)
  1522. {
  1523. found_bad = FALSE;
  1524. # ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */
  1525. if (codepage == CP_UTF8)
  1526. {
  1527. /* Handle CP_UTF8 input ourselves to be able to handle
  1528. * trailing bytes properly.
  1529. * Get one UTF-8 character from src. */
  1530. bytelen = (int)utf_ptr2len_len(src, size);
  1531. if (bytelen > size)
  1532. {
  1533. /* Only got some bytes of a character. Normally
  1534. * it's put in "conv_rest", but if it's too long
  1535. * deal with it as if they were illegal bytes. */
  1536. if (bytelen <= CONV_RESTLEN)
  1537. break;
  1538. /* weird overlong byte sequence */
  1539. bytelen = size;
  1540. found_bad = TRUE;
  1541. }
  1542. else
  1543. {
  1544. int u8c = utf_ptr2char(src);
  1545. if (u8c > 0xffff || (*src >= 0x80 && bytelen == 1))
  1546. found_bad = TRUE;
  1547. ucs2buf[0] = u8c;
  1548. ucs2len = 1;
  1549. }
  1550. }
  1551. else
  1552. # endif
  1553. {
  1554. /* We don't know how long the byte sequence is, try
  1555. * from one to three bytes. */
  1556. for (bytelen = 1; bytelen <= size && bytelen <= 3;
  1557. ++bytelen)
  1558. {
  1559. ucs2len = MultiByteToWideChar(codepage,
  1560. MB_ERR_INVALID_CHARS,
  1561. (LPCSTR)src, bytelen,
  1562. ucs2buf, 3);
  1563. if (ucs2len > 0)
  1564. break;
  1565. }
  1566. if (ucs2len == 0)
  1567. {
  1568. /* If we have only one byte then it's probably an
  1569. * incomplete byte sequence. Otherwise discard
  1570. * one byte as a bad character. */
  1571. if (size == 1)
  1572. break;
  1573. found_bad = TRUE;
  1574. bytelen = 1;
  1575. }
  1576. }
  1577. if (!found_bad)
  1578. {
  1579. int i;
  1580. /* Convert "ucs2buf[ucs2len]" to 'enc' in "dst". */
  1581. if (enc_utf8)
  1582. {
  1583. /* From UCS-2 to UTF-8. Cannot fail. */
  1584. for (i = 0; i < ucs2len; ++i)
  1585. dst += utf_char2bytes(ucs2buf[i], dst);
  1586. }
  1587. else
  1588. {
  1589. BOOL bad = FALSE;
  1590. int dstlen;
  1591. /* From UCS-2 to "enc_codepage". If the
  1592. * conversion uses the default character "?",
  1593. * the data doesn't fit in this encoding. */
  1594. dstlen = WideCharToMultiByte(enc_codepage, 0,
  1595. (LPCWSTR)ucs2buf, ucs2len,
  1596. (LPSTR)dst, (int)(src - dst),
  1597. replstr, &bad);
  1598. if (bad)
  1599. found_bad = TRUE;
  1600. else
  1601. dst += dstlen;
  1602. }
  1603. }
  1604. if (found_bad)
  1605. {
  1606. /* Deal with bytes we can't convert. */
  1607. if (can_retry)
  1608. goto rewind_retry;
  1609. if (conv_error == 0)
  1610. conv_error = readfile_linenr(linecnt, ptr, dst);
  1611. if (bad_char_behavior != BAD_DROP)
  1612. {
  1613. if (bad_char_behavior == BAD_KEEP)
  1614. {
  1615. mch_memmove(dst, src, bytelen);
  1616. dst += bytelen;
  1617. }
  1618. else
  1619. *dst++ = bad_char_behavior;
  1620. }
  1621. }
  1622. src += bytelen;
  1623. size -= bytelen;
  1624. }
  1625. if (size > 0)
  1626. {
  1627. /* An incomplete byte sequence remaining. */
  1628. mch_memmove(conv_rest, src, size);
  1629. conv_restlen = size;
  1630. }
  1631. /* The new size is equal to how much "dst" was advanced. */
  1632. size = (long)(dst - ptr);
  1633. }
  1634. else
  1635. # endif
  1636. # ifdef MACOS_CONVERT
  1637. if (fio_flags & FIO_MACROMAN)
  1638. {
  1639. /*
  1640. * Conversion from Apple MacRoman char encoding to UTF-8 or
  1641. * latin1. This is in os_mac_conv.c.
  1642. */
  1643. if (macroman2enc(ptr, &size, real_size) == FAIL)
  1644. goto rewind_retry;
  1645. }
  1646. else
  1647. # endif
  1648. if (fio_flags != 0)
  1649. {
  1650. int u8c;
  1651. char_u *dest;
  1652. char_u *tail = NULL;
  1653. /*
  1654. * "enc_utf8" set: Convert Unicode or Latin1 to UTF-8.
  1655. * "enc_utf8" not set: Convert Unicode to Latin1.
  1656. * Go from end to start through the buffer, because the number
  1657. * of bytes may increase.
  1658. * "dest" points to after where the UTF-8 bytes go, "p" points
  1659. * to after the next character to convert.
  1660. */
  1661. dest = ptr + real_size;
  1662. if (fio_flags == FIO_LATIN1 || fio_flags == FIO_UTF8)
  1663. {
  1664. p = ptr + size;
  1665. if (fio_flags == FIO_UTF8)
  1666. {
  1667. /* Check for a trailing incomplete UTF-8 sequence */
  1668. tail = ptr + size - 1;
  1669. while (tail > ptr && (*tail & 0xc0) == 0x80)
  1670. --tail;
  1671. if (tail + utf_byte2len(*tail) <= ptr + size)
  1672. tail = NULL;
  1673. else
  1674. p = tail;
  1675. }
  1676. }
  1677. else if (fio_flags & (FIO_UCS2 | FIO_UTF16))
  1678. {
  1679. /* Check for a trailing byte */
  1680. p = ptr + (size & ~1);
  1681. if (size & 1)
  1682. tail = p;
  1683. if ((fio_flags & FIO_UTF16) && p > ptr)
  1684. {
  1685. /* Check for a trailing leading word */
  1686. if (fio_flags & FIO_ENDIAN_L)
  1687. {
  1688. u8c = (*--p << 8);
  1689. u8c += *--p;
  1690. }
  1691. else
  1692. {
  1693. u8c = *--p;
  1694. u8c += (*--p << 8);
  1695. }
  1696. if (u8c >= 0xd800 && u8c <= 0xdbff)
  1697. tail = p;
  1698. else
  1699. p += 2;
  1700. }
  1701. }
  1702. else /* FIO_UCS4 */
  1703. {
  1704. /* Check for trailing 1, 2 or 3 bytes */
  1705. p = ptr + (size & ~3);
  1706. if (size & 3)
  1707. tail = p;
  1708. }
  1709. /* If there is a trailing incomplete sequence move it to
  1710. * conv_rest[]. */
  1711. if (tail != NULL)
  1712. {
  1713. conv_restlen = (int)((ptr + size) - tail);
  1714. mch_memmove(conv_rest, (char_u *)tail, conv_restlen);
  1715. size -= conv_restlen;
  1716. }
  1717. while (p > ptr)
  1718. {
  1719. if (fio_flags & FIO_LATIN1)
  1720. u8c = *--p;
  1721. else if (fio_flags & (FIO_UCS2 | FIO_UTF16))
  1722. {
  1723. if (fio_flags & FIO_ENDIAN_L)
  1724. {
  1725. u8c = (*--p << 8);
  1726. u8c += *--p;
  1727. }
  1728. else
  1729. {
  1730. u8c = *--p;
  1731. u8c += (*--p << 8);
  1732. }
  1733. if ((fio_flags & FIO_UTF16)
  1734. && u8c >= 0xdc00 && u8c <= 0xdfff)
  1735. {
  1736. int u16c;
  1737. if (p == ptr)
  1738. {
  1739. /* Missing leading word. */
  1740. if (can_retry)
  1741. goto rewind_retry;
  1742. i