PageRenderTime 137ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/ishtar_common/data_importer.py

https://gitlab.com/cyrilbrulebois/ishtar
Python | 1292 lines | 1221 code | 16 blank | 55 comment | 37 complexity | d032be595de546177baec23ffe0cacf4 MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Copyright (C) 2013-2015 Étienne Loks <etienne.loks_AT_peacefrogsDOTnet>
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU Affero General Public License as
  6. # published by the Free Software Foundation, either version 3 of the
  7. # License, or (at your option) any later version.
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU Affero General Public License for more details.
  12. # You should have received a copy of the GNU Affero General Public License
  13. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. # See the file COPYING for details.
  15. import copy
  16. import csv
  17. import datetime
  18. import io
  19. import logging
  20. import re
  21. import sys
  22. import zipfile
  23. from django.conf import settings
  24. from django.contrib.auth.models import User
  25. from django.core.files import File
  26. from django.db import IntegrityError, DatabaseError, transaction
  27. from django.template.defaultfilters import slugify
  28. from django.utils.translation import ugettext_lazy as _
  29. NEW_LINE_BREAK = '#####@@@#####'
  30. RE_FILTER_CEDEX = re.compile("(.*) *(?: *CEDEX|cedex|Cedex|Cédex|cédex *\d*)")
  31. class ImportFormater(object):
  32. def __init__(self, field_name, formater=None, required=True, through=None,
  33. through_key=None, through_dict=None,
  34. through_unicity_keys=None, duplicate_fields=[], regexp=None,
  35. regexp_formater_args=[], force_value=None,
  36. post_processing=False, concat=False, concat_str=False,
  37. comment="", force_new=None):
  38. self.field_name = field_name
  39. self.formater = formater
  40. self.required = required
  41. self.through = through
  42. self.through_key = through_key
  43. self.through_dict = through_dict
  44. self.through_unicity_keys = through_unicity_keys
  45. self.duplicate_fields = duplicate_fields
  46. self.regexp = regexp
  47. self.regexp_formater_args = regexp_formater_args
  48. # write this value even if a value exists
  49. self.force_value = force_value
  50. # post process after import
  51. self.post_processing = post_processing
  52. # concatenate with existing value
  53. self.concat = concat
  54. self.concat_str = concat_str
  55. self.comment = comment
  56. self.force_new = force_new
  57. def reinit_db_target(self, db_target, nb=0):
  58. if not self.formater:
  59. return
  60. if type(db_target) in (list, tuple):
  61. db_target = db_target[nb]
  62. if type(self.formater) not in (list, tuple):
  63. self.formater.db_target = db_target
  64. self.formater.init_db_target()
  65. else:
  66. for idx, formater in enumerate(self.formater):
  67. formater.db_target = db_target
  68. formater.init_db_target()
  69. def init_db_target(self):
  70. pass
  71. def __unicode__(self):
  72. return self.field_name
  73. def report_succes(self, *args):
  74. return
  75. def report_error(self, *args):
  76. return
  77. def init(self, vals, output=None, choose_default=False,
  78. import_instance=None):
  79. try:
  80. lst = iter(self.formater)
  81. except TypeError:
  82. lst = [self.formater]
  83. for formater in lst:
  84. if formater:
  85. formater.check(vals, output, self.comment,
  86. choose_default=choose_default,
  87. import_instance=import_instance)
  88. def post_process(self, obj, context, value, owner=None):
  89. raise NotImplemented()
  90. class ImporterError(Exception):
  91. STANDARD = 'S'
  92. HEADER = 'H'
  93. def __init__(self, message, type='S'):
  94. self.msg = message
  95. self.type = type
  96. def __str__(self):
  97. return self.msg
  98. class Formater(object):
  99. def __init__(self, *args, **kwargs):
  100. self.db_target = kwargs.get('db_target', None)
  101. def format(self, value):
  102. return value
  103. def check(self, values, output=None, comment='', choose_default=False,
  104. import_instance=None):
  105. return
  106. def init_db_target(self):
  107. pass
  108. class ChoiceChecker(object):
  109. def report_new(self, comment):
  110. if not self.new_keys:
  111. return
  112. msg = u"For \"%s\" these new associations have been made:\n" % comment
  113. sys.stderr.write(msg.encode('utf-8'))
  114. for k in self.new_keys:
  115. msg = u'"%s";"%s"\n' % (k, self.new_keys[k])
  116. sys.stderr.write(msg.encode('utf-8'))
  117. class UnicodeFormater(Formater):
  118. def __init__(self, max_length=None, clean=False, re_filter=None,
  119. notnull=False, prefix=u'', db_target=None):
  120. self.max_length = max_length
  121. self.db_target = db_target
  122. self.clean = clean
  123. self.re_filter = re_filter
  124. self.notnull = notnull
  125. self.prefix = prefix
  126. def format(self, value):
  127. try:
  128. if type(value) != unicode:
  129. value = unicode(value.strip())
  130. vals = []
  131. for v in value.split(u'\n'):
  132. v = v.strip()
  133. if v:
  134. vals.append(v)
  135. value = u"\n".join(vals)
  136. if self.re_filter:
  137. m = self.re_filter.match(value)
  138. if m:
  139. value = u"".join(m.groups())
  140. if self.clean:
  141. if value.startswith(","):
  142. value = value[1:]
  143. if value.endswith(","):
  144. value = value[:-1]
  145. value = value.replace(", , ", ", ")
  146. except UnicodeDecodeError:
  147. return
  148. if self.max_length and len(value) > self.max_length:
  149. raise ValueError(
  150. _(u"\"%(value)s\" is too long. The max length is %(length)d "
  151. u"characters.") % {'value': value,
  152. 'length': self.max_length})
  153. if self.notnull and not value:
  154. return
  155. if value:
  156. value = self.prefix + value
  157. return value
  158. class BooleanFormater(Formater):
  159. def format(self, value):
  160. value = value.strip().upper()
  161. if value in ('1', 'OUI', 'VRAI', 'YES', 'TRUE'):
  162. return True
  163. if value in ('', '0', 'NON', 'FAUX', 'NO', 'FALSE'):
  164. return False
  165. raise ValueError(_(u"\"%(value)s\" not equal to yes or no") % {
  166. 'value': value})
  167. class FloatFormater(Formater):
  168. def format(self, value):
  169. value = value.strip().replace(',', '.')
  170. if not value:
  171. return
  172. try:
  173. return float(value)
  174. except ValueError:
  175. raise ValueError(_(u"\"%(value)s\" is not a float") % {
  176. 'value': value})
  177. class YearFormater(Formater):
  178. def format(self, value):
  179. value = value.strip()
  180. if not value:
  181. return
  182. try:
  183. value = int(value)
  184. assert value > 0 and value < (datetime.date.today().year + 30)
  185. except (ValueError, AssertionError):
  186. raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
  187. 'value': value})
  188. class YearNoFuturFormater(Formater):
  189. def format(self, value):
  190. value = value.strip()
  191. if not value:
  192. return
  193. try:
  194. value = int(value)
  195. assert value > 0 and value < (datetime.date.today().year)
  196. except (ValueError, AssertionError):
  197. raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
  198. 'value': value})
  199. class IntegerFormater(Formater):
  200. def format(self, value):
  201. value = value.strip()
  202. if not value:
  203. return
  204. try:
  205. return int(value)
  206. except ValueError:
  207. raise ValueError(_(u"\"%(value)s\" is not an integer") % {
  208. 'value': value})
  209. class StrChoiceFormater(Formater, ChoiceChecker):
  210. def __init__(self, choices, strict=False, equiv_dict={}, model=None,
  211. cli=False, many_split='', db_target=None):
  212. self.choices = list(choices)
  213. self.strict = strict
  214. self.equiv_dict = copy.deepcopy(equiv_dict)
  215. self.cli = cli
  216. self.model = model
  217. self.db_target = db_target
  218. self.create = False
  219. self.missings = set()
  220. self.new_keys = {}
  221. self.match_table = {}
  222. self.many_split = many_split
  223. for key, value in self.choices:
  224. value = unicode(value)
  225. if not self.strict:
  226. value = slugify(value)
  227. if value not in self.equiv_dict:
  228. v = key
  229. if model and v:
  230. v = model.objects.get(pk=v)
  231. self.equiv_dict[value] = v
  232. self.init_db_target()
  233. def init_db_target(self):
  234. if not self.db_target:
  235. return
  236. for target_key in self.db_target.keys.filter(is_set=True).all():
  237. key = target_key.key
  238. if not self.strict:
  239. key = slugify(key)
  240. if key in self.equiv_dict:
  241. continue
  242. v = target_key.value
  243. if self.model and v and type(v) in (int, unicode):
  244. try:
  245. v = self.model.objects.get(txt_idx=v)
  246. except:
  247. v = self.model.objects.get(pk=v)
  248. self.equiv_dict[key] = v
  249. def prepare(self, value):
  250. return unicode(value).strip()
  251. def _get_choices(self, comment=''):
  252. msgstr = comment + u" - "
  253. msgstr += unicode(_(u"Choice for \"%s\" is not available. "
  254. u"Which one is relevant?\n"))
  255. idx = -1
  256. for idx, choice in enumerate(self.choices):
  257. msgstr += u"%d. %s\n" % (idx + 1, choice[1])
  258. idx += 2
  259. if self.create:
  260. msgstr += unicode(_(u"%d. None of the above - create new")) % idx \
  261. + u"\n"
  262. idx += 1
  263. msgstr += unicode(_(u"%d. None of the above - skip")) % idx + u"\n"
  264. return msgstr, idx
  265. def check(self, values, output=None, comment='', choose_default=False,
  266. import_instance=None):
  267. from ishtar_common.models import TargetKey
  268. if self.db_target:
  269. q = {'target': self.db_target,
  270. 'associated_import': import_instance,
  271. 'is_set': True
  272. }
  273. for v in self.equiv_dict:
  274. q['key'] = v
  275. value = self.equiv_dict[v]
  276. if hasattr(value, 'pk'):
  277. value = value.pk
  278. q['value'] = value
  279. with transaction.commit_on_success():
  280. try:
  281. t, created = TargetKey.objects.get_or_create(**q)
  282. except IntegrityError:
  283. pass
  284. if (not output or output == 'silent') and not choose_default:
  285. return
  286. if self.many_split:
  287. new_values = []
  288. r = re.compile(self.many_split)
  289. for value in values:
  290. new_values += r.split(value)
  291. values = new_values
  292. for value in values:
  293. base_value = copy.copy(value)
  294. value = self.prepare(value)
  295. if value in self.equiv_dict:
  296. continue
  297. if output != 'cli' and not choose_default:
  298. self.missings.add(value)
  299. continue
  300. msgstr, idx = self._get_choices(comment)
  301. res = None
  302. if choose_default:
  303. res = 1
  304. while res not in range(1, idx + 1):
  305. msg = msgstr % value
  306. sys.stdout.write(msg.encode('utf-8'))
  307. sys.stdout.write("\n>>> ")
  308. res = raw_input()
  309. try:
  310. res = int(res)
  311. except ValueError:
  312. pass
  313. res -= 1
  314. if res < len(self.choices):
  315. v = self.choices[res][0]
  316. if self.model and v:
  317. v = self.model.objects.get(pk=v)
  318. self.equiv_dict[value] = v
  319. self.add_key(v, value)
  320. self.new_keys[value] = v
  321. elif self.create and res == len(self.choices):
  322. self.equiv_dict[value] = self.new(base_value)
  323. self.choices.append((self.equiv_dict[value].pk,
  324. unicode(self.equiv_dict[value])))
  325. self.new_keys[value] = unicode(self.equiv_dict[value])
  326. else:
  327. self.equiv_dict[value] = None
  328. if self.equiv_dict[value] and self.db_target:
  329. from ishtar_common.models import TargetKey
  330. q = {'target': self.db_target, 'key': value,
  331. 'associated_import': import_instance,
  332. }
  333. query = TargetKey.objects.filter(**q)
  334. if query.count():
  335. target = query.all()[0]
  336. target.value = self.equiv_dict[value]
  337. target.is_set = True
  338. target.save()
  339. else:
  340. with transaction.commit_on_success():
  341. q['value'] = self.equiv_dict[value]
  342. q['is_set'] = True
  343. try:
  344. TargetKey.objects.create(**q)
  345. except IntegrityError:
  346. pass
  347. if output == 'db' and self.db_target:
  348. from ishtar_common.models import TargetKey
  349. for missing in self.missings:
  350. q = {'target': self.db_target, 'key': missing,
  351. 'associated_import': import_instance}
  352. if TargetKey.objects.filter(**q).count():
  353. continue
  354. with transaction.commit_on_success():
  355. try:
  356. TargetKey.objects.create(**q)
  357. except IntegrityError:
  358. pass
  359. if output == 'cli':
  360. self.report_new(comment)
  361. def new(self, value):
  362. return
  363. def add_key(self, obj, value):
  364. return
  365. def format(self, value):
  366. origin_value = value
  367. value = self.prepare(value)
  368. if not self.strict:
  369. value = slugify(value)
  370. if value in self.equiv_dict:
  371. self.match_table[origin_value] = self.equiv_dict[value] or ''
  372. return self.equiv_dict[value]
  373. class TypeFormater(StrChoiceFormater):
  374. def __init__(self, model, cli=False, defaults={}, many_split=False,
  375. db_target=None):
  376. self.create = True
  377. self.strict = False
  378. self.model = model
  379. self.defaults = defaults
  380. self.many_split = many_split
  381. self.db_target = db_target
  382. self.missings = set()
  383. self.equiv_dict, self.choices = {}, []
  384. self.match_table = {}
  385. self.new_keys = {}
  386. for item in model.objects.all():
  387. self.choices.append((item.pk, unicode(item)))
  388. for key in item.get_keys():
  389. self.equiv_dict[key] = item
  390. def prepare(self, value):
  391. return slugify(unicode(value).strip())
  392. def add_key(self, obj, value):
  393. obj.add_key(slugify(value), force=True)
  394. def new(self, value):
  395. values = copy.copy(self.defaults)
  396. values['label'] = value
  397. values['txt_idx'] = slugify(value)
  398. if 'order' in self.model._meta.get_all_field_names():
  399. order = 1
  400. q = self.model.objects.values('order').order_by('-order')
  401. if q.count():
  402. order = q.all()[0]['order'] or 1
  403. values['order'] = order
  404. return self.model.objects.create(**values)
  405. class DateFormater(Formater):
  406. def __init__(self, date_formats=["%d/%m/%Y"], db_target=None):
  407. self.date_formats = date_formats
  408. if type(date_formats) not in (list, tuple):
  409. self.date_formats = [self.date_formats]
  410. self.db_target = db_target
  411. def format(self, value):
  412. value = value.strip()
  413. if not value:
  414. return
  415. for date_format in self.date_formats:
  416. try:
  417. return datetime.datetime.strptime(value, date_format).date()
  418. except:
  419. continue
  420. raise ValueError(_(u"\"%(value)s\" is not a valid date") % {
  421. 'value': value})
  422. class FileFormater(Formater):
  423. need_archive = True
  424. def format(self, value, archive):
  425. value = value.strip()
  426. if not value:
  427. return
  428. zp = zipfile.ZipFile(archive)
  429. value = value.strip().replace(u'\\', u'/')
  430. items = value.replace(u'/', u'_').split(u'.')
  431. filename = settings.MEDIA_ROOT + 'imported/' + \
  432. u".".join(items[:-1]) + u'.' + items[-1]
  433. try:
  434. with open(filename, 'w') as f:
  435. with zp.open(value) as z:
  436. f.write(z.read())
  437. f = open(filename, 'r')
  438. my_file = File(f)
  439. return my_file
  440. except KeyError:
  441. raise ValueError(_(u"\"%(value)s\" is not a valid path for the "
  442. u"given archive") % {'value': value})
  443. class StrToBoolean(Formater, ChoiceChecker):
  444. def __init__(self, choices={}, cli=False, strict=False, db_target=None):
  445. self.dct = copy.copy(choices)
  446. self.cli = cli
  447. self.strict = strict
  448. self.db_target = db_target
  449. self.missings = set()
  450. self.init_db_target()
  451. self.match_table = {}
  452. self.new_keys = {}
  453. def init_db_target(self):
  454. if not self.db_target:
  455. return
  456. for target_key in self.db_target.keys.filter(is_set=True).all():
  457. key = self.prepare(target_key.key)
  458. if key in self.dct:
  459. continue
  460. v = target_key.format()
  461. self.dct[key] = v
  462. def prepare(self, value):
  463. value = unicode(value).strip()
  464. if not self.strict:
  465. value = slugify(value)
  466. return value
  467. def check(self, values, output=None, comment='', choose_default=False,
  468. import_instance=None):
  469. if (not output or output == 'silent') and not choose_default:
  470. return
  471. msgstr = comment + u" - "
  472. msgstr += unicode(_(
  473. u"Choice for \"%s\" is not available. "
  474. u"Which one is relevant?\n"))
  475. msgstr += u"1. True\n"
  476. msgstr += u"2. False\n"
  477. msgstr += u"3. Empty\n"
  478. for value in values:
  479. value = self.prepare(value)
  480. if value in self.dct:
  481. continue
  482. if output != 'cli' and not choose_default:
  483. self.missings.add(value)
  484. continue
  485. res = None
  486. if choose_default:
  487. res = 1
  488. while res not in range(1, 4):
  489. msg = msgstr % value
  490. sys.stdout.write(msg.encode('utf-8'))
  491. sys.stdout.write("\n>>> ")
  492. res = raw_input()
  493. try:
  494. res = int(res)
  495. except ValueError:
  496. pass
  497. if res == 1:
  498. self.dct[value] = True
  499. elif res == 2:
  500. self.dct[value] = False
  501. else:
  502. self.dct[value] = None
  503. self.new_keys[value] = unicode(self.dct[value])
  504. if output == 'db' and self.db_target:
  505. from ishtar_common.models import TargetKey
  506. for missing in self.missings:
  507. try:
  508. q = {'target': self.db_target, 'key': missing,
  509. 'associated_import': import_instance}
  510. if not TargetKey.objects.filter(**q).count():
  511. TargetKey.objects.create(**q)
  512. except IntegrityError:
  513. pass
  514. if output == 'cli':
  515. self.report_new(comment)
  516. def format(self, value):
  517. origin_value = value
  518. value = self.prepare(value)
  519. if value in self.dct:
  520. val = self.dct[value] and "True" or "False"
  521. self.match_table[origin_value] = _(val)
  522. return self.dct[value]
  523. logger = logging.getLogger(__name__)
  524. def get_object_from_path(obj, path):
  525. for k in path.split('__')[:-1]:
  526. if not hasattr(obj, k):
  527. return
  528. obj = getattr(obj, k)
  529. return obj
  530. class Importer(object):
  531. SLUG = ''
  532. NAME = ''
  533. DESC = ""
  534. LINE_FORMAT = []
  535. OBJECT_CLS = None
  536. IMPORTED_LINE_FIELD = None
  537. UNICITY_KEYS = []
  538. EXTRA_DEFAULTS = {}
  539. DEFAULTS = {}
  540. ERRORS = {
  541. 'header_check': _(
  542. u"The given file is not correct. Check the file "
  543. u"format. If you use a CSV file: check that column separator "
  544. u"and encoding are similar to the ones used by the reference "
  545. u"file."),
  546. 'too_many_cols': _(u"Too many cols (%(user_col)d) when "
  547. u"maximum is %(ref_col)d"),
  548. 'no_data': _(u"No data provided"),
  549. 'value_required': _(u"Value is required"),
  550. 'not_enough_cols': _(u"At least %d columns must be filled"),
  551. 'regex_not_match': _(u"The regexp doesn't match.")
  552. }
  553. def _create_models(self, force=False):
  554. from ishtar_common import models
  555. q = models.ImporterType.objects.filter(slug=self.SLUG)
  556. if not force and (not self.SLUG or q.count()):
  557. return
  558. if force and q.count():
  559. q.all()[0].delete()
  560. name = self.NAME if self.NAME else self.SLUG
  561. model_name = self.OBJECT_CLS.__module__ + '.' + \
  562. self.OBJECT_CLS.__name__
  563. unicity_keys = ''
  564. if self.UNICITY_KEYS:
  565. unicity_keys = ";".join(self.UNICITY_KEYS)
  566. importer = models.ImporterType.objects.create(
  567. slug=self.SLUG, name=name, description=self.DESC,
  568. associated_models=model_name, unicity_keys=unicity_keys)
  569. for default in self.DEFAULTS:
  570. values = self.DEFAULTS[default]
  571. imp_default = models.ImporterDefault.objects.create(
  572. importer_type=importer,
  573. target='__'.join(default))
  574. for key in values:
  575. if key in ('history_modifier',):
  576. continue
  577. value = values[key]
  578. if hasattr(value, 'txt_idx') and value.txt_idx:
  579. value = value.txt_idx
  580. elif hasattr(value, 'pk') and value.pk:
  581. value = value.pk
  582. if callable(value):
  583. value = value()
  584. models.ImporterDefaultValues.objects.create(
  585. default_target=imp_default,
  586. target=key,
  587. value=value)
  588. for idx, line in enumerate(self.line_format):
  589. idx += 1
  590. if not line:
  591. continue
  592. column = models.ImporterColumn.objects.create(
  593. importer_type=importer, col_number=idx)
  594. targets = line.field_name
  595. if type(targets) not in (list, tuple):
  596. targets = [targets]
  597. formaters = line.formater
  598. if type(formaters) not in (list, tuple):
  599. formaters = [formaters]
  600. for idx, target in enumerate(targets):
  601. formater = formaters[idx]
  602. formater_name = formater.__class__.__name__
  603. if formater_name not in models.IMPORTER_TYPES_DCT:
  604. formater_name = 'UnknowType'
  605. options = ''
  606. if formater_name == 'TypeFormater':
  607. options = formater.model.__module__ + '.' + \
  608. formater.model.__name__
  609. elif formater_name == 'UnicodeFormater':
  610. options = unicode(formater.max_length or '')
  611. elif formater_name == 'DateFormater':
  612. options = formater.date_formats[0]
  613. formater_model, created = \
  614. models.FormaterType.objects.get_or_create(
  615. formater_type=formater_name, options=options.strip(),
  616. many_split=getattr(formater, 'many_split', None) or '')
  617. regexp_filter = None
  618. if getattr(formater, 'regexp', None):
  619. regexp_filter, created = \
  620. models.Regexp.objects.get_or_create(
  621. regexp=formater.regex,
  622. defaults={'name': "Default name"})
  623. models.ImportTarget.objects.get_or_create(
  624. column=column, target=target, formater_type=formater_model,
  625. force_new=getattr(formater, 'force_new', False),
  626. concat=getattr(formater, 'concat', False),
  627. concat_str=getattr(formater, 'concat_str', ''),
  628. regexp_filter=regexp_filter,
  629. comment=line.comment)
  630. return True
  631. def __init__(self, skip_lines=0, reference_header=None,
  632. check_col_num=False, test=False, history_modifier=None,
  633. output='silent', import_instance=None,
  634. conservative_import=False):
  635. """
  636. * skip_line must be set if the data provided has got headers lines.
  637. * a reference_header can be provided to perform a data compliance
  638. check. It can be useful to warn about bad parsing.
  639. * test doesn't write in the database
  640. """
  641. self.skip_lines = skip_lines
  642. self.reference_header = reference_header
  643. self.test = test
  644. self.errors = [] # list of (line, col, message)
  645. self.validity = [] # list of (line, col, message)
  646. self.number_updated = 0
  647. self.number_created = 0
  648. self.check_col_num = check_col_num
  649. self.line_format = copy.copy(self.LINE_FORMAT)
  650. self.import_instance = import_instance
  651. self.archive = None
  652. self.conservative_import = conservative_import
  653. # for a conservative_import UNICITY_KEYS should be defined
  654. assert not self.conservative_import or bool(self.UNICITY_KEYS)
  655. self.DB_TARGETS = {}
  656. self.match_table = {}
  657. self.concats = set()
  658. self.concat_str = {}
  659. if import_instance and import_instance.imported_images:
  660. self.archive = import_instance.imported_images
  661. self._defaults = self.DEFAULTS.copy()
  662. # EXTRA_DEFAULTS are for multiple inheritance
  663. if self.EXTRA_DEFAULTS:
  664. for k in self.EXTRA_DEFAULTS:
  665. if k not in self._defaults:
  666. self._defaults[k] = {}
  667. self._defaults[k].update(self.EXTRA_DEFAULTS[k])
  668. self.history_modifier = history_modifier
  669. self.output = output
  670. if not self.history_modifier:
  671. if self.import_instance:
  672. self.history_modifier = self.import_instance.user
  673. else:
  674. # import made by the CLI: get the first admin
  675. self.history_modifier = User.objects.filter(
  676. is_superuser=True).order_by('pk')[0]
  677. def post_processing(self, item, data):
  678. # force django based post-processing for the item
  679. item.save()
  680. if hasattr(item, 'RELATED_POST_PROCESS'):
  681. for related_key in item.RELATED_POST_PROCESS:
  682. for related in getattr(item, related_key).all():
  683. related.save()
  684. return item
  685. def initialize(self, table, output='silent', choose_default=False):
  686. """
  687. copy vals in columns and initialize formaters
  688. * output:
  689. - 'silent': no associations
  690. - 'cli': output by command line interface and stocked in the database
  691. - 'db': output on the database with no interactive association
  692. (further exploitation by web interface)
  693. """
  694. assert output in ('silent', 'cli', 'db')
  695. vals = []
  696. for idx_line, line in enumerate(table):
  697. if self.skip_lines > idx_line:
  698. continue
  699. for idx_col, val in enumerate(line):
  700. if idx_col >= len(self.line_format):
  701. break
  702. if idx_col >= len(vals):
  703. vals.append([])
  704. vals[idx_col].append(val)
  705. for idx, formater in enumerate(self.line_format):
  706. if formater and idx < len(vals):
  707. if self.DB_TARGETS:
  708. field_names = formater.field_name
  709. if type(field_names) not in (list, tuple):
  710. field_names = [field_names]
  711. db_targets = []
  712. for field_name in field_names:
  713. db_targets.append(
  714. self.DB_TARGETS["{}-{}".format(
  715. idx + 1, field_name)])
  716. formater.reinit_db_target(db_targets)
  717. formater.init(vals[idx], output, choose_default=choose_default,
  718. import_instance=self.import_instance)
  719. def importation(self, table, initialize=True, choose_default=False):
  720. if initialize:
  721. self.initialize(table, self.output, choose_default=choose_default)
  722. self._importation(table)
  723. def _associate_db_target_to_formaters(self):
  724. if not self.import_instance:
  725. return
  726. self.DB_TARGETS = {}
  727. from ishtar_common.models import ImporterColumn, ImportTarget
  728. for idx, line in enumerate(self.line_format):
  729. idx += 1
  730. if not line:
  731. continue
  732. col = ImporterColumn.objects.get(
  733. importer_type=self.import_instance.importer_type,
  734. col_number=idx)
  735. formater = line.formater
  736. targets = line.field_name
  737. if type(formater) not in (list, tuple):
  738. formater = [formater]
  739. targets = [targets]
  740. for target in targets:
  741. tg = target
  742. if type(target) == list and type(target[0]) == list:
  743. tg = target[0]
  744. self.DB_TARGETS["{}-{}".format(idx, tg)] = \
  745. ImportTarget.objects.get(column=col, target=tg)
  746. @classmethod
  747. def _field_name_to_data_dict(
  748. cls, field_name, value, data, force_value=False, concat=False,
  749. concat_str=u"", force_new=False):
  750. field_names = field_name
  751. if type(field_names) not in (list, tuple):
  752. field_names = [field_name]
  753. for field_name in field_names:
  754. keys = field_name.split('__')
  755. current_data = data
  756. for idx, key in enumerate(keys):
  757. if idx == (len(keys) - 1): # last
  758. if concat:
  759. if key not in current_data:
  760. current_data[key] = ""
  761. if not value:
  762. continue
  763. current_data[key] = (current_data[key] + concat_str) \
  764. if current_data[key] else u""
  765. current_data[key] += value
  766. elif force_value and value:
  767. if concat_str and key in current_data \
  768. and current_data[key]:
  769. current_data[key] = unicode(current_data[key]) + \
  770. concat_str + unicode(value)
  771. else:
  772. current_data[key] = value
  773. elif key not in current_data or not current_data[key]:
  774. current_data[key] = value
  775. elif concat_str:
  776. current_data[key] = unicode(current_data[key]) +\
  777. concat_str + unicode(value)
  778. if force_new:
  779. current_data['__force_new'] = True
  780. elif key not in current_data:
  781. current_data[key] = {}
  782. current_data = current_data[key]
  783. return data
  784. def _importation(self, table):
  785. self.match_table = {}
  786. table = list(table)
  787. if not table or not table[0]:
  788. raise ImporterError(self.ERRORS['no_data'], ImporterError.HEADER)
  789. if self.check_col_num and len(table[0]) > len(self.line_format):
  790. raise ImporterError(self.ERRORS['too_many_cols'] % {
  791. 'user_col': len(table[0]), 'ref_col': len(self.line_format)})
  792. self.errors = []
  793. self.validity = []
  794. self.number_imported = 0
  795. # index of the last required column
  796. for idx_last_col, formater in enumerate(reversed(self.line_format)):
  797. if formater and formater.required:
  798. break
  799. else:
  800. idx_last_col += 1
  801. # min col number to be filled
  802. self.min_col_number = len(self.line_format) - idx_last_col
  803. # check the conformity with the reference header
  804. if self.reference_header and \
  805. self.skip_lines and \
  806. self.reference_header != table[0]:
  807. raise ImporterError(self.ERRORS['header_check'],
  808. type=ImporterError.HEADER)
  809. self.now = datetime.datetime.now()
  810. start = datetime.datetime.now()
  811. total = len(table)
  812. if self.output == 'cli':
  813. sys.stdout.write("\n")
  814. for idx_line, line in enumerate(table):
  815. if self.output == 'cli':
  816. left = None
  817. if idx_line > 10:
  818. ellapsed = datetime.datetime.now() - start
  819. time_by_item = ellapsed / idx_line
  820. if time_by_item:
  821. left = ((total - idx_line) * time_by_item).seconds
  822. txt = u"\r* %d/%d" % (idx_line + 1, total)
  823. if left:
  824. txt += u" (%d seconds left)" % left
  825. sys.stdout.write(txt.encode('utf-8'))
  826. sys.stdout.flush()
  827. try:
  828. self._line_processing(idx_line, line)
  829. except ImporterError, msg:
  830. self.errors.append((idx_line, None, msg))
  831. def _line_processing(self, idx_line, line):
  832. self.idx_line = idx_line
  833. if self.skip_lines > idx_line:
  834. self.validity.append(line)
  835. return
  836. if not line:
  837. self.validity.append([])
  838. return
  839. self._throughs = [] # list of (formater, value)
  840. self._post_processing = [] # list of (formater, value)
  841. data = {}
  842. # keep in database the raw line for testing purpose
  843. if self.IMPORTED_LINE_FIELD:
  844. output = io.StringIO()
  845. writer = csv.writer(output)
  846. writer.writerow(line)
  847. data[self.IMPORTED_LINE_FIELD] = output.getvalue()
  848. n = datetime.datetime.now()
  849. logger.debug('%s - Processing line %d' % (unicode(n - self.now),
  850. idx_line))
  851. self.now = n
  852. n2 = n
  853. self.c_errors = False
  854. c_row = []
  855. for idx_col, val in enumerate(line):
  856. try:
  857. self._row_processing(c_row, idx_col, idx_line, val, data)
  858. except:
  859. pass
  860. self.validity.append(c_row)
  861. if not self.c_errors and (idx_col + 1) < self.min_col_number:
  862. self.c_errors = True
  863. self.errors.append((
  864. idx_line + 1, idx_col + 1,
  865. self.ERRORS['not_enough_cols'] % self.min_col_number))
  866. if self.c_errors:
  867. return
  868. n = datetime.datetime.now()
  869. logger.debug('* %s - Cols read' % (unicode(n - n2)))
  870. n2 = n
  871. if self.test:
  872. return
  873. # manage unicity of items (mainly for updates)
  874. if 'history_modifier' in \
  875. self.OBJECT_CLS._meta.get_all_field_names():
  876. data['history_modifier'] = self.history_modifier
  877. obj, created = self.get_object(self.OBJECT_CLS, data)
  878. if self.import_instance and hasattr(obj, 'imports') \
  879. and created:
  880. obj.imports.add(self.import_instance)
  881. if created:
  882. self.number_created += 1
  883. else:
  884. self.number_updated += 1
  885. if not created and 'defaults' in data:
  886. for k in data['defaults']:
  887. setattr(obj, k, data['defaults'][k])
  888. obj.save()
  889. n = datetime.datetime.now()
  890. logger.debug('* %s - Item saved' % (unicode(n - n2)))
  891. n2 = n
  892. for formater, value in self._throughs:
  893. n = datetime.datetime.now()
  894. logger.debug('* %s - Processing formater %s' % (unicode(n - n2),
  895. formater.field_name))
  896. n2 = n
  897. data = {}
  898. if formater.through_dict:
  899. data = formater.through_dict.copy()
  900. if formater.through_key:
  901. data[formater.through_key] = obj
  902. data[formater.field_name] = value
  903. through_cls = formater.through
  904. if formater.through_unicity_keys:
  905. data['defaults'] = {}
  906. for k in data.keys():
  907. if k not in formater.through_unicity_keys \
  908. and k != 'defaults':
  909. data['defaults'][k] = data.pop(k)
  910. if '__force_new' in data:
  911. created = data.pop('__force_new')
  912. t_obj = through_cls.objects.create(**data)
  913. else:
  914. t_obj, created = through_cls.objects.get_or_create(**data)
  915. if not created and 'defaults' in data:
  916. for k in data['defaults']:
  917. setattr(t_obj, k, data['defaults'][k])
  918. t_obj.save()
  919. if self.import_instance and hasattr(t_obj, 'imports') \
  920. and created:
  921. t_obj.imports.add(self.import_instance)
  922. for formater, val in self._post_processing:
  923. formater.post_process(obj, data, val, owner=self.history_modifier)
  924. obj = self.post_processing(obj, data)
  925. def _row_processing(self, c_row, idx_col, idx_line, val, data):
  926. if idx_col >= len(self.line_format):
  927. return
  928. formater = self.line_format[idx_col]
  929. if formater and formater.post_processing:
  930. self._post_processing.append((formater, val))
  931. if not formater or not formater.field_name:
  932. c_row.append(_('Not imported'))
  933. return
  934. # regex management
  935. if formater.regexp:
  936. # multiline regexp is a mess...
  937. val = val.replace('\n', NEW_LINE_BREAK)
  938. match = formater.regexp.match(val)
  939. if not match:
  940. if formater.required:
  941. self.errors.append(
  942. (idx_line + 1, idx_col + 1,
  943. self.ERRORS['value_required']))
  944. self.c_errors = True
  945. elif not val.strip():
  946. c_row.append("")
  947. return
  948. val = val.replace(NEW_LINE_BREAK, '\n')
  949. self.errors.append(
  950. (idx_line + 1, idx_col + 1,
  951. unicode(self.ERRORS['regex_not_match']) + val))
  952. c_row.append("")
  953. return
  954. val_group = [v.replace(NEW_LINE_BREAK, '\n') if v else ''
  955. for v in match.groups()]
  956. else:
  957. val_group = [val]
  958. c_values = []
  959. for idx_v, v in enumerate(val_group):
  960. func = formater.formater
  961. if type(func) in (list, tuple):
  962. func = func[idx_v]
  963. if not callable(func) and type(func) in (unicode, str):
  964. func = getattr(self, func)
  965. values = [v]
  966. many_values = getattr(func, 'many_split', None)
  967. if many_values:
  968. values = re.split(func.many_split, values[0])
  969. formated_values = []
  970. field_name = formater.field_name
  971. force_new = formater.force_new
  972. if type(field_name) in (list, tuple):
  973. field_name = field_name[idx_v]
  974. if type(force_new) in (list, tuple):
  975. force_new = force_new[idx_v]
  976. if formater.concat:
  977. self.concats.add(field_name)
  978. concat_str = formater.concat_str
  979. if type(formater.concat_str) in (list, tuple):
  980. concat_str = concat_str[idx_v]
  981. if concat_str:
  982. self.concat_str[field_name] = concat_str
  983. if self.DB_TARGETS:
  984. formater.reinit_db_target(
  985. self.DB_TARGETS["{}-{}".format(idx_col + 1, field_name)],
  986. idx_v)
  987. for idx, v in enumerate(values):
  988. value = None
  989. try:
  990. if formater.regexp_formater_args:
  991. args = []
  992. for idx in formater.regexp_formater_args[idx_v]:
  993. args.append(val_group[idx])
  994. value = func.format(*args)
  995. else:
  996. if getattr(func, 'need_archive', False):
  997. value = func.format(v, archive=self.archive)
  998. else:
  999. value = func.format(v)
  1000. except ValueError, e:
  1001. if formater.required:
  1002. self.c_errors = True
  1003. self.errors.append((idx_line + 1, idx_col + 1, e.message))
  1004. c_values.append('')
  1005. return
  1006. formated_values.append(value)
  1007. if hasattr(func, 'match_table'):
  1008. if field_name not in self.match_table:
  1009. self.match_table[field_name] = {}
  1010. self.match_table[field_name].update(func.match_table)
  1011. value = formated_values
  1012. if not many_values:
  1013. value = formated_values[0]
  1014. printed_values = value
  1015. if type(value) not in (list, tuple):
  1016. printed_values = [value]
  1017. try:
  1018. # don't reunicode - unicoded values
  1019. c_values.append(u" ; ".join([v for v in printed_values]))
  1020. except TypeError:
  1021. c_values.append(u" ; ".join([unicode(v)
  1022. for v in printed_values]))
  1023. if value is None and formater.required:
  1024. self.c_errors = True
  1025. self.errors.append((idx_line + 1, idx_col + 1,
  1026. self.ERRORS['value_required']))
  1027. return
  1028. field_names = [field_name]
  1029. force_news = [force_new]
  1030. concats = [formater.concat]
  1031. concat_str = [concat_str]
  1032. for duplicate_field in formater.duplicate_fields:
  1033. if type(duplicate_field[0]) in (list, tuple):
  1034. duplicate_field, force_new, concat, conc_str = \
  1035. duplicate_field[idx_v]
  1036. else:
  1037. duplicate_field, force_new, concat, conc_str = \
  1038. duplicate_field
  1039. field_names += [duplicate_field]
  1040. force_news += [force_new]
  1041. concats += [concat]
  1042. concat_str += [conc_str]
  1043. if formater.through:
  1044. self._throughs.append((formater, value))
  1045. else:
  1046. for idx, field_name in enumerate(field_names):
  1047. self._field_name_to_data_dict(
  1048. field_name, value, data, formater.force_value,
  1049. force_new=force_news[idx], concat=concats[idx],
  1050. concat_str=concat_str[idx])
  1051. c_row.append(u" ; ".join([v for v in c_values]))
  1052. def get_field(self, cls, attribute, data, m2ms, c_path):
  1053. field_object, model, direct, m2m = \
  1054. cls._meta.get_field_by_name(attribute)
  1055. if m2m:
  1056. many_values = data.pop(attribute)
  1057. if hasattr(field_object, 'rel'):
  1058. model = field_object.rel.to
  1059. elif hasattr(field_object, 'to'):
  1060. model = field_object.to
  1061. elif hasattr(field_object, 'model'):
  1062. model = field_object.model
  1063. if type(many_values) not in (list, tuple):
  1064. many_values = [many_values]
  1065. for val in many_values:
  1066. if val.__class__ == model:
  1067. m2ms.append((attribute, val))
  1068. elif val.__class__ != model and type(val) == dict:
  1069. vals = []
  1070. # contruct many dict for each values
  1071. default_dict = {}
  1072. # # get default values
  1073. p = [attribute]
  1074. if c_path:
  1075. p = list(c_path) + p
  1076. p = tuple(p)
  1077. if p in self._defaults:
  1078. for k in self._defaults[p]:
  1079. default_dict[k] = self._defaults[p][k]
  1080. # # init with simple values that will be duplicated
  1081. for key in val.keys():
  1082. if type(val[key]) not in (list, tuple):
  1083. default_dict[key] = val[key]
  1084. vals.append(default_dict.copy())
  1085. # # manage multiple values
  1086. for key in val.keys():
  1087. if type(val[key]) in (list, tuple):
  1088. for idx, v in enumerate(val[key]):
  1089. if len(vals) <= idx:
  1090. vals.append(default_dict.copy())
  1091. vals[idx][key] = v
  1092. # check that m2m are not empty
  1093. notempty = False
  1094. for dct in vals:
  1095. for k in dct:
  1096. if dct[k] not in ("", None):
  1097. notempty = True
  1098. break
  1099. if not notempty:
  1100. continue
  1101. field_names = model._meta.get_all_field_names()
  1102. for v in vals:
  1103. if 'history_modifier' in field_names:
  1104. if 'defaults' not in v:
  1105. v['defaults'] = {}
  1106. v['defaults']['history_modifier'] = \
  1107. self.history_modifier
  1108. m2m_m2ms = []
  1109. c_c_path = c_path[:]
  1110. for k in v.keys():
  1111. if k not in field_names:
  1112. continue
  1113. self.get_field(model, k, v, m2m_m2ms, c_c_path)
  1114. if '__force_new' in v:
  1115. created = v.pop('__force_new')
  1116. has_values = bool([1 for k in v if v[k]])
  1117. if has_values:
  1118. v = model.objects.create(**v)
  1119. else:
  1120. continue
  1121. else:
  1122. v['defaults'] = v.get('defaults', {})
  1123. v, created = model.objects.get_or_create(
  1124. **v)
  1125. for att, objs in m2m_m2ms:
  1126. if type(objs) not in (list, tuple):
  1127. objs = [objs]
  1128. for obj in objs:
  1129. getattr(v, att).add(obj)
  1130. if self.import_instance \
  1131. and hasattr(v, 'imports') and created:
  1132. v.imports.add(self.import_instance)
  1133. m2ms.append((attribute, v))
  1134. elif hasattr(field_object, 'rel') and field_object.rel:
  1135. if type(data[attribute]) == dict:
  1136. # put history_modifier for every created item
  1137. if 'history_modifier' in \
  1138. field_object.rel.to._meta.get_all_field_names():
  1139. data[attribute]['history_modifier'] = \
  1140. self.history_modifier
  1141. try:
  1142. c_path.append(attribute)
  1143. data[attribute], created = self.get_object(
  1144. field_object.rel.to, data[attribute].copy(), c_path)
  1145. except ImporterError, msg:
  1146. self.errors.append((self.idx_line, None, msg))
  1147. data[attribute] = None
  1148. elif type(data[attribute]) == list:
  1149. data[attribute] = data[attribute][0]
  1150. def get_object(self, cls, data, path=[]):
  1151. m2ms = []
  1152. if data and type(data) == dict:
  1153. c_path = path[:]
  1154. # get all related fields
  1155. for attribute in list(data.keys()):
  1156. c_c_path = c_path[:]
  1157. if not attribute:
  1158. data.pop(attribute)
  1159. continue
  1160. if not data[attribute]:
  1161. continue
  1162. if attribute != '__force_new':
  1163. self.get_field(cls, attribute, data, m2ms, c_c_path)
  1164. # filter uncessary default values
  1165. create_dict = copy.deepcopy(data)
  1166. for k in create_dict.keys():
  1167. if type(create_dict[k]) == dict:
  1168. create_dict.pop(k)
  1169. # default values
  1170. path = tuple(path)
  1171. defaults = {}
  1172. if path in self._defaults:
  1173. for k in self._defaults[path]: