PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/ase/test/fio/test_cif.py

https://gitlab.com/oschuett/ase
Python | 559 lines | 550 code | 8 blank | 1 comment | 5 complexity | 6583121032369246196d7a88193f61ec MD5 | raw file
  1. import io
  2. import numpy as np
  3. import warnings
  4. import pytest
  5. from ase import Atoms
  6. from ase.build import molecule
  7. from ase.io import read, write
  8. from ase.io.cif import CIFLoop, parse_loop, NoStructureData, parse_cif
  9. from ase.calculators.calculator import compare_atoms
  10. def check_fractional_occupancies(atoms):
  11. """ Checks fractional occupancy entries in atoms.info dict """
  12. assert atoms.info['occupancy']
  13. assert list(atoms.arrays['spacegroup_kinds'])
  14. occupancies = atoms.info['occupancy']
  15. for key in occupancies:
  16. assert isinstance(key, str)
  17. kinds = atoms.arrays['spacegroup_kinds']
  18. for a in atoms:
  19. a_index_str = str(kinds[a.index])
  20. if a.symbol == 'Na':
  21. assert len(occupancies[a_index_str]) == 2
  22. assert occupancies[a_index_str]['K'] == 0.25
  23. assert occupancies[a_index_str]['Na'] == 0.75
  24. else:
  25. assert len(occupancies[a_index_str]) == 1
  26. if a.symbol == 'Cl':
  27. assert occupancies[a_index_str]['Cl'] == 0.3
  28. content = """
  29. data_1
  30. _chemical_name_common 'Mysterious something'
  31. _cell_length_a 5.50000
  32. _cell_length_b 5.50000
  33. _cell_length_c 5.50000
  34. _cell_angle_alpha 90
  35. _cell_angle_beta 90
  36. _cell_angle_gamma 90
  37. _space_group_name_H-M_alt 'F m -3 m'
  38. _space_group_IT_number 225
  39. loop_
  40. _space_group_symop_operation_xyz
  41. 'x, y, z'
  42. '-x, -y, -z'
  43. '-x, -y, z'
  44. 'x, y, -z'
  45. '-x, y, -z'
  46. 'x, -y, z'
  47. 'x, -y, -z'
  48. '-x, y, z'
  49. 'z, x, y'
  50. '-z, -x, -y'
  51. 'z, -x, -y'
  52. '-z, x, y'
  53. '-z, -x, y'
  54. 'z, x, -y'
  55. '-z, x, -y'
  56. 'z, -x, y'
  57. 'y, z, x'
  58. '-y, -z, -x'
  59. '-y, z, -x'
  60. 'y, -z, x'
  61. 'y, -z, -x'
  62. '-y, z, x'
  63. '-y, -z, x'
  64. 'y, z, -x'
  65. 'y, x, -z'
  66. '-y, -x, z'
  67. '-y, -x, -z'
  68. 'y, x, z'
  69. 'y, -x, z'
  70. '-y, x, -z'
  71. '-y, x, z'
  72. 'y, -x, -z'
  73. 'x, z, -y'
  74. '-x, -z, y'
  75. '-x, z, y'
  76. 'x, -z, -y'
  77. '-x, -z, -y'
  78. 'x, z, y'
  79. 'x, -z, y'
  80. '-x, z, -y'
  81. 'z, y, -x'
  82. '-z, -y, x'
  83. 'z, -y, x'
  84. '-z, y, -x'
  85. '-z, y, x'
  86. 'z, -y, -x'
  87. '-z, -y, -x'
  88. 'z, y, x'
  89. 'x, y+1/2, z+1/2'
  90. '-x, -y+1/2, -z+1/2'
  91. '-x, -y+1/2, z+1/2'
  92. 'x, y+1/2, -z+1/2'
  93. '-x, y+1/2, -z+1/2'
  94. 'x, -y+1/2, z+1/2'
  95. 'x, -y+1/2, -z+1/2'
  96. '-x, y+1/2, z+1/2'
  97. 'z, x+1/2, y+1/2'
  98. '-z, -x+1/2, -y+1/2'
  99. 'z, -x+1/2, -y+1/2'
  100. '-z, x+1/2, y+1/2'
  101. '-z, -x+1/2, y+1/2'
  102. 'z, x+1/2, -y+1/2'
  103. '-z, x+1/2, -y+1/2'
  104. 'z, -x+1/2, y+1/2'
  105. 'y, z+1/2, x+1/2'
  106. '-y, -z+1/2, -x+1/2'
  107. '-y, z+1/2, -x+1/2'
  108. 'y, -z+1/2, x+1/2'
  109. 'y, -z+1/2, -x+1/2'
  110. '-y, z+1/2, x+1/2'
  111. '-y, -z+1/2, x+1/2'
  112. 'y, z+1/2, -x+1/2'
  113. 'y, x+1/2, -z+1/2'
  114. '-y, -x+1/2, z+1/2'
  115. '-y, -x+1/2, -z+1/2'
  116. 'y, x+1/2, z+1/2'
  117. 'y, -x+1/2, z+1/2'
  118. '-y, x+1/2, -z+1/2'
  119. '-y, x+1/2, z+1/2'
  120. 'y, -x+1/2, -z+1/2'
  121. 'x, z+1/2, -y+1/2'
  122. '-x, -z+1/2, y+1/2'
  123. '-x, z+1/2, y+1/2'
  124. 'x, -z+1/2, -y+1/2'
  125. '-x, -z+1/2, -y+1/2'
  126. 'x, z+1/2, y+1/2'
  127. 'x, -z+1/2, y+1/2'
  128. '-x, z+1/2, -y+1/2'
  129. 'z, y+1/2, -x+1/2'
  130. '-z, -y+1/2, x+1/2'
  131. 'z, -y+1/2, x+1/2'
  132. '-z, y+1/2, -x+1/2'
  133. '-z, y+1/2, x+1/2'
  134. 'z, -y+1/2, -x+1/2'
  135. '-z, -y+1/2, -x+1/2'
  136. 'z, y+1/2, x+1/2'
  137. 'x+1/2, y, z+1/2'
  138. '-x+1/2, -y, -z+1/2'
  139. '-x+1/2, -y, z+1/2'
  140. 'x+1/2, y, -z+1/2'
  141. '-x+1/2, y, -z+1/2'
  142. 'x+1/2, -y, z+1/2'
  143. 'x+1/2, -y, -z+1/2'
  144. '-x+1/2, y, z+1/2'
  145. 'z+1/2, x, y+1/2'
  146. '-z+1/2, -x, -y+1/2'
  147. 'z+1/2, -x, -y+1/2'
  148. '-z+1/2, x, y+1/2'
  149. '-z+1/2, -x, y+1/2'
  150. 'z+1/2, x, -y+1/2'
  151. '-z+1/2, x, -y+1/2'
  152. 'z+1/2, -x, y+1/2'
  153. 'y+1/2, z, x+1/2'
  154. '-y+1/2, -z, -x+1/2'
  155. '-y+1/2, z, -x+1/2'
  156. 'y+1/2, -z, x+1/2'
  157. 'y+1/2, -z, -x+1/2'
  158. '-y+1/2, z, x+1/2'
  159. '-y+1/2, -z, x+1/2'
  160. 'y+1/2, z, -x+1/2'
  161. 'y+1/2, x, -z+1/2'
  162. '-y+1/2, -x, z+1/2'
  163. '-y+1/2, -x, -z+1/2'
  164. 'y+1/2, x, z+1/2'
  165. 'y+1/2, -x, z+1/2'
  166. '-y+1/2, x, -z+1/2'
  167. '-y+1/2, x, z+1/2'
  168. 'y+1/2, -x, -z+1/2'
  169. 'x+1/2, z, -y+1/2'
  170. '-x+1/2, -z, y+1/2'
  171. '-x+1/2, z, y+1/2'
  172. 'x+1/2, -z, -y+1/2'
  173. '-x+1/2, -z, -y+1/2'
  174. 'x+1/2, z, y+1/2'
  175. 'x+1/2, -z, y+1/2'
  176. '-x+1/2, z, -y+1/2'
  177. 'z+1/2, y, -x+1/2'
  178. '-z+1/2, -y, x+1/2'
  179. 'z+1/2, -y, x+1/2'
  180. '-z+1/2, y, -x+1/2'
  181. '-z+1/2, y, x+1/2'
  182. 'z+1/2, -y, -x+1/2'
  183. '-z+1/2, -y, -x+1/2'
  184. 'z+1/2, y, x+1/2'
  185. 'x+1/2, y+1/2, z'
  186. '-x+1/2, -y+1/2, -z'
  187. '-x+1/2, -y+1/2, z'
  188. 'x+1/2, y+1/2, -z'
  189. '-x+1/2, y+1/2, -z'
  190. 'x+1/2, -y+1/2, z'
  191. 'x+1/2, -y+1/2, -z'
  192. '-x+1/2, y+1/2, z'
  193. 'z+1/2, x+1/2, y'
  194. '-z+1/2, -x+1/2, -y'
  195. 'z+1/2, -x+1/2, -y'
  196. '-z+1/2, x+1/2, y'
  197. '-z+1/2, -x+1/2, y'
  198. 'z+1/2, x+1/2, -y'
  199. '-z+1/2, x+1/2, -y'
  200. 'z+1/2, -x+1/2, y'
  201. 'y+1/2, z+1/2, x'
  202. '-y+1/2, -z+1/2, -x'
  203. '-y+1/2, z+1/2, -x'
  204. 'y+1/2, -z+1/2, x'
  205. 'y+1/2, -z+1/2, -x'
  206. '-y+1/2, z+1/2, x'
  207. '-y+1/2, -z+1/2, x'
  208. 'y+1/2, z+1/2, -x'
  209. 'y+1/2, x+1/2, -z'
  210. '-y+1/2, -x+1/2, z'
  211. '-y+1/2, -x+1/2, -z'
  212. 'y+1/2, x+1/2, z'
  213. 'y+1/2, -x+1/2, z'
  214. '-y+1/2, x+1/2, -z'
  215. '-y+1/2, x+1/2, z'
  216. 'y+1/2, -x+1/2, -z'
  217. 'x+1/2, z+1/2, -y'
  218. '-x+1/2, -z+1/2, y'
  219. '-x+1/2, z+1/2, y'
  220. 'x+1/2, -z+1/2, -y'
  221. '-x+1/2, -z+1/2, -y'
  222. 'x+1/2, z+1/2, y'
  223. 'x+1/2, -z+1/2, y'
  224. '-x+1/2, z+1/2, -y'
  225. 'z+1/2, y+1/2, -x'
  226. '-z+1/2, -y+1/2, x'
  227. 'z+1/2, -y+1/2, x'
  228. '-z+1/2, y+1/2, -x'
  229. '-z+1/2, y+1/2, x'
  230. 'z+1/2, -y+1/2, -x'
  231. '-z+1/2, -y+1/2, -x'
  232. 'z+1/2, y+1/2, x'
  233. loop_
  234. _atom_site_label
  235. _atom_site_occupancy
  236. _atom_site_fract_x
  237. _atom_site_fract_y
  238. _atom_site_fract_z
  239. _atom_site_adp_type
  240. _atom_site_B_iso_or_equiv
  241. _atom_site_type_symbol
  242. Na 0.7500 0.000000 0.000000 0.000000 Biso 1.000000 Na
  243. K 0.2500 0.000000 0.000000 0.000000 Biso 1.000000 K
  244. Cl 0.3000 0.500000 0.500000 0.500000 Biso 1.000000 Cl
  245. I 0.5000 0.250000 0.250000 0.250000 Biso 1.000000 I
  246. """
  247. def test_cif():
  248. cif_file = io.StringIO(content)
  249. # legacy behavior is to not read the K atoms
  250. with warnings.catch_warnings():
  251. warnings.simplefilter("ignore")
  252. atoms_leg = read(cif_file, format='cif', fractional_occupancies=False)
  253. elements = np.unique(atoms_leg.get_atomic_numbers())
  254. for n in (11, 17, 53):
  255. assert n in elements
  256. try:
  257. atoms_leg.info['occupancy']
  258. raise AssertionError
  259. except KeyError:
  260. pass
  261. cif_file = io.StringIO(content)
  262. # new behavior is to still not read the K atoms, but build info
  263. atoms = read(cif_file, format='cif', fractional_occupancies=True)
  264. # yield the same old atoms for fractional_occupancies case
  265. assert len(atoms_leg) == len(atoms)
  266. assert np.all(atoms_leg.get_atomic_numbers() == atoms.get_atomic_numbers())
  267. assert atoms_leg == atoms
  268. elements = np.unique(atoms_leg.get_atomic_numbers())
  269. for n in (11, 17, 53):
  270. assert n in elements
  271. check_fractional_occupancies(atoms)
  272. # read/write
  273. fname = 'testfile.cif'
  274. with open(fname, 'wb') as fd:
  275. write(fd, atoms, format='cif')
  276. with open(fname) as fd:
  277. atoms = read(fd, format='cif', fractional_occupancies=True)
  278. check_fractional_occupancies(atoms)
  279. # check repeating atoms
  280. atoms = atoms.repeat([2, 1, 1])
  281. assert len(atoms.arrays['spacegroup_kinds']) == len(atoms.arrays['numbers'])
  282. # ICSD-like file from issue #293
  283. content2 = """
  284. data_global
  285. _cell_length_a 9.378(5)
  286. _cell_length_b 7.488(5)
  287. _cell_length_c 6.513(5)
  288. _cell_angle_alpha 90.
  289. _cell_angle_beta 91.15(5)
  290. _cell_angle_gamma 90.
  291. _cell_volume 457.27
  292. _cell_formula_units_Z 2
  293. _symmetry_space_group_name_H-M 'P 1 n 1'
  294. _symmetry_Int_Tables_number 7
  295. _refine_ls_R_factor_all 0.071
  296. loop_
  297. _symmetry_equiv_pos_site_id
  298. _symmetry_equiv_pos_as_xyz
  299. 1 'x+1/2, -y, z+1/2'
  300. 2 'x, y, z'
  301. loop_
  302. _atom_type_symbol
  303. _atom_type_oxidation_number
  304. Sn2+ 2
  305. As4+ 4
  306. Se2- -2
  307. loop_
  308. _atom_site_label
  309. _atom_site_type_symbol
  310. _atom_site_symmetry_multiplicity
  311. _atom_site_Wyckoff_symbol
  312. _atom_site_fract_x
  313. _atom_site_fract_y
  314. _atom_site_fract_z
  315. _atom_site_B_iso_or_equiv
  316. _atom_site_occupancy
  317. _atom_site_attached_hydrogens
  318. Sn1 Sn2+ 2 a 0.5270(2) 0.3856(2) 0.7224(3) 0.0266(4) 1. 0
  319. Sn2 Sn2+ 2 a 0.0279(2) 0.1245(2) 0.7870(2) 0.0209(4) 1. 0
  320. As1 As4+ 2 a 0.6836(4) 0.1608(5) 0.8108(6) 0.0067(7) 1. 0
  321. As2 As4+ 2 a 0.8174(4) 0.6447(6) 0.1908(6) 0.0057(6) 1. 0
  322. Se1 Se2- 2 a 0.4898(4) 0.7511(6) 0.8491(6) 0.0110(6) 1. 0
  323. Se2 Se2- 2 a 0.7788(4) 0.6462(6) 0.2750(6) 0.0097(6) 1. 0
  324. Se3 Se2- 2 a 0.6942(4) 0.0517(5) 0.5921(6) 0.2095(6) 1. 0
  325. Se4 Se2- 2 a 0.0149(4) 0.3437(6) 0.5497(7) 0.1123(7) 1. 0
  326. Se5 Se2- 2 a 0.1147(4) 0.5633(4) 0.3288(6) 0.1078(6) 1. 0
  327. Se6 Se2- 2 a 0.0050(4) 0.4480(6) 0.9025(6) 0.9102(6) 1. 0
  328. """
  329. def test_cif_icsd():
  330. cif_file = io.StringIO(content2)
  331. atoms = read(cif_file, format='cif')
  332. # test something random so atoms is not unused
  333. assert 'occupancy' in atoms.info
  334. @pytest.fixture
  335. def cif_atoms():
  336. cif_file = io.StringIO(content)
  337. return read(cif_file, format='cif')
  338. def test_cif_loop_keys(cif_atoms):
  339. data = {}
  340. # test case has 20 entries
  341. data['someKey'] = [[str(i) + "test" for i in range(20)]]
  342. # test case has 20 entries
  343. data['someIntKey'] = [[str(i) + "123" for i in range(20)]]
  344. cif_atoms.write('testfile.cif', loop_keys=data)
  345. atoms1 = read('testfile.cif', store_tags=True)
  346. # keys are read lowercase only
  347. r_data = {'someKey': atoms1.info['_somekey'],
  348. 'someIntKey': atoms1.info['_someintkey']}
  349. assert r_data['someKey'] == data['someKey'][0]
  350. # data reading auto converts strins
  351. assert r_data['someIntKey'] == [int(x) for x in data['someIntKey'][0]]
  352. # test if automatic numbers written after elements are correct
  353. def test_cif_writer_label_numbers(cif_atoms):
  354. cif_atoms.write('testfile.cif')
  355. atoms1 = read('testfile.cif', store_tags=True)
  356. labels = atoms1.info['_atom_site_label']
  357. # cannot use atoms.symbols as K is missing there
  358. elements = atoms1.info['_atom_site_type_symbol']
  359. build_labels = [
  360. "{:}{:}".format(
  361. x, i) for x in set(elements) for i in range(
  362. 1, elements.count(x) + 1)]
  363. assert build_labels.sort() == labels.sort()
  364. def test_cif_labels(cif_atoms):
  365. data = [["label" + str(i) for i in range(20)]] # test case has 20 entries
  366. cif_atoms.write('testfile.cif', labels=data)
  367. atoms1 = read('testfile.cif', store_tags=True)
  368. print(atoms1.info)
  369. assert data[0] == atoms1.info['_atom_site_label']
  370. def test_cifloop():
  371. dct = {'_eggs': range(4),
  372. '_potatoes': [1.3, 7.1, -1, 0]}
  373. loop = CIFLoop()
  374. loop.add('_eggs', dct['_eggs'], '{:<2d}')
  375. loop.add('_potatoes', dct['_potatoes'], '{:.4f}')
  376. string = loop.tostring() + '\n\n'
  377. lines = string.splitlines()[::-1]
  378. assert lines.pop() == 'loop_'
  379. newdct = parse_loop(lines)
  380. print(newdct)
  381. assert set(dct) == set(newdct)
  382. for name in dct:
  383. assert dct[name] == pytest.approx(newdct[name])
  384. @pytest.mark.parametrize('data', [b'', b'data_dummy'])
  385. def test_empty_or_atomless(data):
  386. ciffile = io.BytesIO(data)
  387. images = read(ciffile, index=':', format='cif')
  388. assert len(images) == 0
  389. def test_empty_or_atomless_cifblock():
  390. ciffile = io.BytesIO(b'data_dummy')
  391. blocks = list(parse_cif(ciffile))
  392. assert len(blocks) == 1
  393. assert not blocks[0].has_structure()
  394. with pytest.raises(NoStructureData):
  395. blocks[0].get_atoms()
  396. def test_symbols_questionmark():
  397. ciffile = io.BytesIO(
  398. b'data_dummy\n'
  399. b'loop_\n'
  400. b'_atom_site_label\n'
  401. b'?\n')
  402. blocks = list(parse_cif(ciffile))
  403. assert not blocks[0].has_structure()
  404. with pytest.raises(NoStructureData, match='undetermined'):
  405. blocks[0].get_atoms()
  406. def test_bad_occupancies(cif_atoms):
  407. assert 'Au' not in cif_atoms.symbols
  408. cif_atoms.symbols[0] = 'Au'
  409. with pytest.warns(UserWarning, match='no occupancy info'):
  410. write('tmp.cif', cif_atoms)
  411. @pytest.mark.parametrize(
  412. 'setting_name, ref_setting',
  413. [
  414. ('hexagonal', 1),
  415. ('trigonal', 2),
  416. ('rhombohedral', 2)
  417. ]
  418. )
  419. def test_spacegroup_named_setting(setting_name, ref_setting):
  420. """The rhombohedral crystal system signifies setting=2"""
  421. ciffile = io.BytesIO("""\
  422. data_test
  423. _space_group_crystal_system {}
  424. _symmetry_space_group_name_H-M 'R-3m'
  425. """.format(setting_name).encode('ascii'))
  426. blocks = list(parse_cif(ciffile))
  427. assert len(blocks) == 1
  428. spg = blocks[0].get_spacegroup(False)
  429. assert int(spg) == 166
  430. assert spg.setting == ref_setting
  431. @pytest.fixture
  432. def atoms():
  433. return Atoms('CO', cell=[2., 3., 4., 50., 60., 70.], pbc=True,
  434. scaled_positions=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])
  435. def roundtrip(atoms):
  436. from ase.io.bytes import to_bytes, parse_atoms
  437. buf = to_bytes(atoms, format='cif')
  438. return parse_atoms(buf, format='cif')
  439. def test_cif_roundtrip_periodic(atoms):
  440. # Reading and writing the cell loses the rotation information,
  441. # but preserves cellpar and scaled positions.
  442. atoms1 = roundtrip(atoms)
  443. assert str(atoms1.symbols) == 'CO'
  444. assert all(atoms1.pbc)
  445. assert atoms.cell.cellpar() == pytest.approx(
  446. atoms1.cell.cellpar(), abs=1e-5)
  447. assert atoms.get_scaled_positions() == pytest.approx(
  448. atoms1.get_scaled_positions(), abs=1e-5)
  449. def test_cif_roundtrip_nonperiodic():
  450. atoms = molecule('H2O')
  451. atoms1 = roundtrip(atoms)
  452. assert not compare_atoms(atoms, atoms1, tol=1e-5)
  453. def test_cif_missingvector(atoms):
  454. # We don't know any way to represent only 2 cell vectors in CIF.
  455. # So we discard them and warn the user.
  456. atoms.cell[0] = 0.0
  457. atoms.pbc[0] = False
  458. assert atoms.cell.rank == 2
  459. with pytest.raises(ValueError, match='CIF format can only'):
  460. roundtrip(atoms)
  461. def test_cif_roundtrip_mixed():
  462. atoms = Atoms('Au', cell=[1., 2., 3.], pbc=[1, 1, 0])
  463. atoms1 = roundtrip(atoms)
  464. # We cannot preserve PBC info for this case:
  465. assert all(atoms1.pbc)
  466. assert compare_atoms(atoms, atoms1, tol=1e-5) == ['pbc']
  467. assert atoms.get_scaled_positions() == pytest.approx(
  468. atoms1.get_scaled_positions(), abs=1e-5)
  469. #assert pytest.approx(atoms.positions) == atoms1.positions
  470. #assert atoms1.cell.rank == 0
  471. cif_with_whitespace_after_loop = b"""\
  472. data_image0
  473. loop_
  474. _hello
  475. banana
  476. _potato 42
  477. """
  478. def test_loop_with_space():
  479. # Regression test for https://gitlab.com/ase/ase/-/issues/859 .
  480. buf = io.BytesIO(cif_with_whitespace_after_loop)
  481. blocks = list(parse_cif(buf))
  482. assert len(blocks) == 1
  483. assert blocks[0]['_potato'] == 42