PageRenderTime 44ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/corehq/apps/userreports/models.py

https://github.com/dimagi/commcare-hq
Python | 1361 lines | 1301 code | 29 blank | 31 comment | 7 complexity | b26c157e3f7dc2027b4d138c78514639 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. import glob
  2. import json
  3. import os
  4. import re
  5. from collections import namedtuple
  6. from copy import copy, deepcopy
  7. from corehq import toggles
  8. from datetime import datetime
  9. from uuid import UUID
  10. from django.conf import settings
  11. from django.contrib.postgres.fields import ArrayField
  12. from django.core.serializers.json import DjangoJSONEncoder
  13. from django.db import models
  14. from django.utils.functional import cached_property
  15. from django.utils.translation import gettext as _
  16. import yaml
  17. from couchdbkit.exceptions import BadValueError
  18. from django_bulk_update.helper import bulk_update as bulk_update_helper
  19. from jsonpath_ng.ext import parser
  20. from memoized import memoized
  21. from corehq.apps.domain.models import AllowedUCRExpressionSettings
  22. from dimagi.ext.couchdbkit import (
  23. BooleanProperty,
  24. DateTimeProperty,
  25. DecimalProperty,
  26. DictProperty,
  27. Document,
  28. DocumentSchema,
  29. IntegerProperty,
  30. ListProperty,
  31. SchemaListProperty,
  32. SchemaProperty,
  33. StringListProperty,
  34. StringProperty,
  35. )
  36. from dimagi.ext.jsonobject import JsonObject
  37. from dimagi.utils.couch import CriticalSection
  38. from dimagi.utils.couch.bulk import get_docs
  39. from dimagi.utils.couch.database import iter_docs
  40. from dimagi.utils.couch.undo import is_deleted
  41. from dimagi.utils.dates import DateSpan
  42. from dimagi.utils.modules import to_function
  43. from corehq.apps.cachehq.mixins import (
  44. CachedCouchDocumentMixin,
  45. QuickCachedDocumentMixin,
  46. )
  47. from corehq.apps.registry.helper import DataRegistryHelper
  48. from corehq.apps.userreports.app_manager.data_source_meta import (
  49. REPORT_BUILDER_DATA_SOURCE_TYPE_VALUES,
  50. )
  51. from corehq.apps.userreports.columns import get_expanded_column_config
  52. from corehq.apps.userreports.const import (
  53. ALL_EXPRESSION_TYPES,
  54. DATA_SOURCE_TYPE_AGGREGATE,
  55. DATA_SOURCE_TYPE_STANDARD,
  56. FILTER_INTERPOLATION_DOC_TYPES,
  57. UCR_NAMED_EXPRESSION,
  58. UCR_NAMED_FILTER,
  59. UCR_SQL_BACKEND,
  60. VALID_REFERENCED_DOC_TYPES,
  61. )
  62. from corehq.apps.userreports.dbaccessors import (
  63. get_all_registry_data_source_ids,
  64. get_datasources_for_domain,
  65. get_number_of_registry_report_configs_by_data_source,
  66. get_number_of_report_configs_by_data_source,
  67. get_registry_data_sources_by_domain,
  68. get_registry_report_configs_for_domain,
  69. get_report_configs_for_domain,
  70. )
  71. from corehq.apps.userreports.exceptions import (
  72. BadSpecError,
  73. DataSourceConfigurationNotFoundError,
  74. DuplicateColumnIdError,
  75. InvalidDataSourceType,
  76. ReportConfigurationNotFoundError,
  77. StaticDataSourceConfigurationNotFoundError,
  78. ValidationError,
  79. )
  80. from corehq.apps.userreports.expressions.factory import ExpressionFactory
  81. from corehq.apps.userreports.extension_points import (
  82. static_ucr_data_source_paths,
  83. static_ucr_report_paths,
  84. )
  85. from corehq.apps.userreports.filters.factory import FilterFactory
  86. from corehq.apps.userreports.indicators import CompoundIndicator
  87. from corehq.apps.userreports.indicators.factory import IndicatorFactory
  88. from corehq.apps.userreports.reports.factory import (
  89. ChartFactory,
  90. ReportColumnFactory,
  91. ReportOrderByFactory,
  92. )
  93. from corehq.apps.userreports.reports.filters.factory import ReportFilterFactory
  94. from corehq.apps.userreports.reports.filters.specs import FilterSpec
  95. from corehq.apps.userreports.specs import EvaluationContext, FactoryContext
  96. from corehq.apps.userreports.sql.util import decode_column_name
  97. from corehq.apps.userreports.util import (
  98. get_async_indicator_modify_lock_key,
  99. get_indicator_adapter,
  100. wrap_report_config_by_type,
  101. )
  102. from corehq.pillows.utils import get_deleted_doc_types
  103. from corehq.sql_db.connections import UCR_ENGINE_ID, connection_manager
  104. from corehq.util.couch import DocumentNotFound, get_document_or_not_found
  105. from corehq.util.quickcache import quickcache
  106. ID_REGEX_CHECK = re.compile(r"^[\w\-:]+$")
  107. def _check_ids(value):
  108. if not ID_REGEX_CHECK.match(value):
  109. raise BadValueError("Invalid ID: '{}'".format(value))
  110. class DataSourceActionLog(models.Model):
  111. """
  112. Audit model that tracks changes to UCRs and their underlying tables.
  113. """
  114. BUILD = 'build'
  115. MIGRATE = 'migrate'
  116. REBUILD = 'rebuild'
  117. DROP = 'drop'
  118. domain = models.CharField(max_length=126, null=False, db_index=True)
  119. indicator_config_id = models.CharField(max_length=126, null=False, db_index=True)
  120. initiated_by = models.CharField(max_length=126, null=True, blank=True)
  121. action_source = models.CharField(max_length=126, null=True, db_index=True)
  122. date_created = models.DateTimeField(auto_now_add=True)
  123. action = models.CharField(max_length=32, choices=(
  124. (BUILD, _('Build')),
  125. (MIGRATE, _('Migrate')),
  126. (REBUILD, _('Rebuild')),
  127. (DROP, _('Drop')),
  128. ), db_index=True, null=False)
  129. migration_diffs = models.JSONField(null=True, blank=True)
  130. # True for actions that were skipped because the data source
  131. # was marked with ``disable_destructive_rebuild``
  132. skip_destructive = models.BooleanField(default=False)
  133. class SQLColumnIndexes(DocumentSchema):
  134. column_ids = StringListProperty()
  135. class SQLPartition(DocumentSchema):
  136. """Uses architect library to partition
  137. http://architect.readthedocs.io/features/partition/index.html
  138. """
  139. column = StringProperty()
  140. subtype = StringProperty(choices=['date', 'string_firstchars', 'string_lastchars'])
  141. constraint = StringProperty()
  142. class SQLSettings(DocumentSchema):
  143. partition_config = SchemaListProperty(SQLPartition) # no longer used
  144. primary_key = ListProperty()
  145. class DataSourceBuildInformation(DocumentSchema):
  146. """
  147. A class to encapsulate meta information about the process through which
  148. its DataSourceConfiguration was configured and built.
  149. """
  150. # Either the case type or the form xmlns that this data source is based on.
  151. source_id = StringProperty()
  152. # The app that the form belongs to, or the app that was used to infer the case properties.
  153. app_id = StringProperty()
  154. # The version of the app at the time of the data source's configuration.
  155. app_version = IntegerProperty()
  156. # The registry_slug associated with the registry of the report.
  157. registry_slug = StringProperty()
  158. # True if the data source has been built, that is, if the corresponding SQL table has been populated.
  159. finished = BooleanProperty(default=False)
  160. # Start time of the most recent build SQL table celery task.
  161. initiated = DateTimeProperty()
  162. # same as previous attributes but used for rebuilding tables in place
  163. finished_in_place = BooleanProperty(default=False)
  164. initiated_in_place = DateTimeProperty()
  165. rebuilt_asynchronously = BooleanProperty(default=False)
  166. class DataSourceMeta(DocumentSchema):
  167. build = SchemaProperty(DataSourceBuildInformation)
  168. # If this is a linked datasource, this is the ID of the datasource this pulls from
  169. master_id = StringProperty()
  170. class Validation(DocumentSchema):
  171. name = StringProperty(required=True)
  172. expression = DictProperty(required=True)
  173. error_message = StringProperty(required=True)
  174. class AbstractUCRDataSource(object):
  175. """
  176. Base wrapper class for datasource-like things to be used in reports.
  177. This doesn't use abc because of this issue: https://stackoverflow.com/q/8723639/8207
  178. This is not really a "designed" interface so much as the set of methods/properties that
  179. the objects need to have in order to work with UCRs.
  180. In addition to the methods defined, the following should also exist:
  181. domain: a string
  182. engine_id: a string
  183. table_id: a string
  184. display_name: a string
  185. sql_column_indexes: a list of SQLColumnIndexes
  186. sql_settings: a SQLSettings object
  187. """
  188. @property
  189. def data_source_id(self):
  190. """
  191. The data source's ID
  192. """
  193. raise NotImplementedError()
  194. def get_columns(self):
  195. raise NotImplementedError()
  196. @property
  197. def pk_columns(self):
  198. raise NotImplementedError()
  199. class MirroredEngineIds(DocumentSchema):
  200. server_environment = StringProperty()
  201. engine_ids = StringListProperty()
  202. class DataSourceConfiguration(CachedCouchDocumentMixin, Document, AbstractUCRDataSource):
  203. """
  204. A data source configuration. These map 1:1 with database tables that get created.
  205. Each data source can back an arbitrary number of reports.
  206. """
  207. domain = StringProperty(required=True)
  208. engine_id = StringProperty(default=UCR_ENGINE_ID)
  209. backend_id = StringProperty(default=UCR_SQL_BACKEND) # no longer used
  210. referenced_doc_type = StringProperty(required=True)
  211. table_id = StringProperty(required=True)
  212. display_name = StringProperty()
  213. base_item_expression = DictProperty()
  214. configured_filter = DictProperty()
  215. configured_indicators = ListProperty()
  216. named_expressions = DictProperty()
  217. named_filters = DictProperty()
  218. meta = SchemaProperty(DataSourceMeta)
  219. is_deactivated = BooleanProperty(default=False)
  220. last_modified = DateTimeProperty()
  221. asynchronous = BooleanProperty(default=False)
  222. is_available_in_analytics = BooleanProperty(default=False)
  223. sql_column_indexes = SchemaListProperty(SQLColumnIndexes)
  224. disable_destructive_rebuild = BooleanProperty(default=False)
  225. sql_settings = SchemaProperty(SQLSettings)
  226. validations = SchemaListProperty(Validation)
  227. mirrored_engine_ids = ListProperty(default=[])
  228. class Meta(object):
  229. # prevent JsonObject from auto-converting dates etc.
  230. string_conversions = ()
  231. def __str__(self):
  232. return '{} - {}'.format(self.domain, self.display_name)
  233. @property
  234. def is_deleted(self):
  235. return is_deleted(self)
  236. def save(self, **params):
  237. self.last_modified = datetime.utcnow()
  238. super(DataSourceConfiguration, self).save(**params)
  239. @property
  240. def data_source_id(self):
  241. return self._id
  242. def filter(self, document, eval_context=None):
  243. if eval_context is None:
  244. eval_context = EvaluationContext(document)
  245. filter_fn = self._get_main_filter()
  246. return filter_fn(document, eval_context)
  247. def deleted_filter(self, document):
  248. filter_fn = self._get_deleted_filter()
  249. return filter_fn and filter_fn(document, EvaluationContext(document, 0))
  250. @property
  251. def has_validations(self):
  252. return len(self.validations) > 0
  253. def validate_document(self, document, eval_context=None):
  254. if eval_context is None:
  255. eval_context = EvaluationContext(document)
  256. errors = []
  257. for validation in self._validations():
  258. if validation.validation_function(document, eval_context) is False:
  259. errors.append((validation.name, validation.error_message))
  260. if errors:
  261. raise ValidationError(errors)
  262. @memoized
  263. def _validations(self):
  264. return [
  265. _Validation(
  266. validation.name,
  267. validation.error_message,
  268. FilterFactory.from_spec(validation.expression, context=self.get_factory_context())
  269. )
  270. for validation in self.validations
  271. ]
  272. @memoized
  273. def _get_main_filter(self):
  274. return self._get_filter([self.referenced_doc_type])
  275. @memoized
  276. def _get_deleted_filter(self):
  277. return self._get_filter(get_deleted_doc_types(self.referenced_doc_type), include_configured=False)
  278. def _get_filter(self, doc_types, include_configured=True):
  279. if not doc_types:
  280. return None
  281. extras = (
  282. [self.configured_filter]
  283. if include_configured and self.configured_filter else []
  284. )
  285. built_in_filters = [
  286. self._get_domain_filter_spec(),
  287. {
  288. 'type': 'or',
  289. 'filters': [
  290. {
  291. "type": "boolean_expression",
  292. "expression": {
  293. "type": "property_name",
  294. "property_name": "doc_type",
  295. },
  296. "operator": "eq",
  297. "property_value": doc_type,
  298. }
  299. for doc_type in doc_types
  300. ],
  301. },
  302. ]
  303. return FilterFactory.from_spec(
  304. {
  305. 'type': 'and',
  306. 'filters': built_in_filters + extras,
  307. },
  308. context=self.get_factory_context(),
  309. )
  310. def _get_domain_filter_spec(self):
  311. return {
  312. "type": "boolean_expression",
  313. "expression": {
  314. "type": "property_name",
  315. "property_name": "domain",
  316. },
  317. "operator": "eq",
  318. "property_value": self.domain,
  319. }
  320. @property
  321. @memoized
  322. def named_expression_objects(self):
  323. named_expression_specs = deepcopy(self.named_expressions)
  324. named_expressions = {}
  325. spec_error = None
  326. factory_context = FactoryContext(named_expressions=named_expressions, named_filters={}, domain=self.domain)
  327. while named_expression_specs:
  328. number_generated = 0
  329. for name, expression in list(named_expression_specs.items()):
  330. try:
  331. factory_context.named_expressions = named_expressions
  332. named_expressions[name] = ExpressionFactory.from_spec(expression, factory_context)
  333. number_generated += 1
  334. del named_expression_specs[name]
  335. except BadSpecError as bad_spec_error:
  336. # maybe a nested name resolution issue, try again on the next pass
  337. spec_error = bad_spec_error
  338. if number_generated == 0 and named_expression_specs:
  339. # we unsuccessfully generated anything on this pass and there are still unresolved
  340. # references. we have to fail.
  341. assert spec_error is not None
  342. raise spec_error
  343. return named_expressions
  344. @property
  345. @memoized
  346. def named_filter_objects(self):
  347. factory_context = FactoryContext(self.named_expression_objects, {}, domain=self.domain)
  348. return {
  349. name: FilterFactory.from_spec(filter, factory_context)
  350. for name, filter in self.named_filters.items()
  351. }
  352. def get_factory_context(self):
  353. return FactoryContext(self.named_expression_objects, self.named_filter_objects, self.domain)
  354. @property
  355. @memoized
  356. def default_indicators(self):
  357. default_indicators = [IndicatorFactory.from_spec({
  358. "column_id": "doc_id",
  359. "type": "expression",
  360. "display_name": "document id",
  361. "datatype": "string",
  362. "is_nullable": False,
  363. "is_primary_key": True,
  364. "expression": {
  365. "type": "root_doc",
  366. "expression": {
  367. "type": "property_name",
  368. "property_name": "_id"
  369. }
  370. }
  371. }, self.get_factory_context())]
  372. default_indicators.append(IndicatorFactory.from_spec({
  373. "type": "inserted_at",
  374. }, self.get_factory_context()))
  375. if self.base_item_expression:
  376. default_indicators.append(IndicatorFactory.from_spec({
  377. "type": "repeat_iteration",
  378. }, self.get_factory_context()))
  379. return default_indicators
  380. @property
  381. @memoized
  382. def indicators(self):
  383. return CompoundIndicator(
  384. self.display_name,
  385. self.default_indicators + [
  386. IndicatorFactory.from_spec(indicator, self.get_factory_context())
  387. for indicator in self.configured_indicators
  388. ],
  389. None,
  390. )
  391. @property
  392. @memoized
  393. def parsed_expression(self):
  394. if self.base_item_expression:
  395. return ExpressionFactory.from_spec(self.base_item_expression, context=self.get_factory_context())
  396. return None
  397. @memoized
  398. def get_columns(self):
  399. return self.indicators.get_columns()
  400. @property
  401. @memoized
  402. def columns_by_id(self):
  403. return {c.id: c for c in self.get_columns()}
  404. def get_column_by_id(self, column_id):
  405. return self.columns_by_id.get(column_id)
  406. def get_items(self, document, eval_context=None):
  407. if self.filter(document, eval_context):
  408. if not self.base_item_expression:
  409. return [document]
  410. else:
  411. result = self.parsed_expression(document, eval_context)
  412. if result is None:
  413. return []
  414. elif isinstance(result, list):
  415. return result
  416. else:
  417. return [result]
  418. else:
  419. return []
  420. def get_all_values(self, doc, eval_context=None):
  421. if not eval_context:
  422. eval_context = EvaluationContext(doc)
  423. if self.has_validations:
  424. try:
  425. self.validate_document(doc, eval_context)
  426. except ValidationError as e:
  427. for error in e.errors:
  428. InvalidUCRData.objects.get_or_create(
  429. doc_id=doc['_id'],
  430. indicator_config_id=self._id,
  431. validation_name=error[0],
  432. defaults={
  433. 'doc_type': doc['doc_type'],
  434. 'domain': doc['domain'],
  435. 'validation_text': error[1],
  436. }
  437. )
  438. return []
  439. rows = []
  440. for item in self.get_items(doc, eval_context):
  441. values = self.indicators.get_values(item, eval_context)
  442. rows.append(values)
  443. eval_context.increment_iteration()
  444. return rows
  445. def get_report_count(self):
  446. """
  447. Return the number of ReportConfigurations that reference this data source.
  448. """
  449. return ReportConfiguration.count_by_data_source(self.domain, self._id)
  450. def validate_db_config(self):
  451. mirrored_engine_ids = self.mirrored_engine_ids
  452. if not mirrored_engine_ids:
  453. return
  454. if self.engine_id in mirrored_engine_ids:
  455. raise BadSpecError("mirrored_engine_ids list should not contain engine_id")
  456. for engine_id in mirrored_engine_ids:
  457. if not connection_manager.engine_id_is_available(engine_id):
  458. raise BadSpecError(
  459. "DB for engine_id {} is not availble".format(engine_id)
  460. )
  461. if not connection_manager.resolves_to_unique_dbs(mirrored_engine_ids + [self.engine_id]):
  462. raise BadSpecError("No two engine_ids should point to the same database")
  463. @property
  464. def data_domains(self):
  465. return [self.domain]
  466. def _verify_contains_allowed_expressions(self):
  467. """
  468. Raise BadSpecError if any disallowed expression is present in datasource
  469. """
  470. disallowed_expressions = AllowedUCRExpressionSettings.disallowed_ucr_expressions(self.domain)
  471. if 'base_item_expression' in disallowed_expressions and self.base_item_expression:
  472. raise BadSpecError(_(f'base_item_expression is not allowed for domain {self.domain}'))
  473. doubtful_keys = dict(indicators=self.configured_indicators, expressions=self.named_expressions)
  474. for expr in disallowed_expressions:
  475. results = parser.parse(f"$..[*][?type={expr}]").find(doubtful_keys)
  476. if results:
  477. raise BadSpecError(_(f'{expr} is not allowed for domain {self.domain}'))
  478. def validate(self, required=True):
  479. super(DataSourceConfiguration, self).validate(required)
  480. # these two properties implicitly call other validation
  481. self._get_main_filter()
  482. self._get_deleted_filter()
  483. # validate indicators and column uniqueness
  484. columns = [c.id for c in self.indicators.get_columns()]
  485. unique_columns = set(columns)
  486. if len(columns) != len(unique_columns):
  487. for column in set(columns):
  488. columns.remove(column)
  489. raise DuplicateColumnIdError(columns=columns)
  490. if self.referenced_doc_type not in VALID_REFERENCED_DOC_TYPES:
  491. raise BadSpecError(
  492. _('Report contains invalid referenced_doc_type: {}').format(self.referenced_doc_type))
  493. self._verify_contains_allowed_expressions()
  494. self.parsed_expression
  495. self.pk_columns
  496. @classmethod
  497. def by_domain(cls, domain):
  498. return get_datasources_for_domain(domain)
  499. @classmethod
  500. def all_ids(cls):
  501. return [res['id'] for res in cls.get_db().view('userreports/data_sources_by_build_info',
  502. reduce=False, include_docs=False)]
  503. @classmethod
  504. def all(cls):
  505. for result in iter_docs(cls.get_db(), cls.all_ids()):
  506. yield cls.wrap(result)
  507. @property
  508. def is_static(self):
  509. return id_is_static(self._id)
  510. def deactivate(self, initiated_by=None):
  511. if not self.is_static:
  512. self.is_deactivated = True
  513. self.save()
  514. get_indicator_adapter(self).drop_table(initiated_by=initiated_by, source='deactivate-data-source')
  515. def get_case_type_or_xmlns_filter(self):
  516. """Returns a list of case types or xmlns from the filter of this data source.
  517. If this can't figure out the case types or xmlns's that filter, then returns [None]
  518. Currently always returns a list because it is called by a loop in _iteratively_build_table
  519. Could be reworked to return [] to be more pythonic
  520. """
  521. if self.referenced_doc_type not in FILTER_INTERPOLATION_DOC_TYPES:
  522. return [None]
  523. property_name = FILTER_INTERPOLATION_DOC_TYPES[self.referenced_doc_type]
  524. prop_value = self._filter_interploation_helper(self.configured_filter, property_name)
  525. return prop_value or [None]
  526. def _filter_interploation_helper(self, config_filter, property_name):
  527. filter_type = config_filter.get('type')
  528. if filter_type == 'and':
  529. sub_config_filters = [
  530. self._filter_interploation_helper(f, property_name)
  531. for f in config_filter.get('filters')
  532. ]
  533. for filter_ in sub_config_filters:
  534. if filter_[0]:
  535. return filter_
  536. if filter_type != 'boolean_expression':
  537. return [None]
  538. if config_filter['operator'] not in ('eq', 'in'):
  539. return [None]
  540. expression = config_filter['expression']
  541. if not isinstance(expression, dict):
  542. return [None]
  543. if expression['type'] == 'property_name' and expression['property_name'] == property_name:
  544. prop_value = config_filter['property_value']
  545. if not isinstance(prop_value, list):
  546. prop_value = [prop_value]
  547. return prop_value
  548. return [None]
  549. @property
  550. def pk_columns(self):
  551. columns = []
  552. for col in self.get_columns():
  553. if col.is_primary_key:
  554. column_name = decode_column_name(col)
  555. columns.append(column_name)
  556. if self.sql_settings.primary_key:
  557. if set(columns) != set(self.sql_settings.primary_key):
  558. raise BadSpecError("Primary key columns must have is_primary_key set to true", self.data_source_id)
  559. columns = self.sql_settings.primary_key
  560. return columns
  561. class RegistryDataSourceConfiguration(DataSourceConfiguration):
  562. """This is a special data source that can contain data from
  563. multiple domains. These data sources are built from
  564. data accessible to the domain via a Data Registry."""
  565. # this field indicates whether the data source is available
  566. # to all domains participating in the registry
  567. globally_accessible = BooleanProperty(default=False)
  568. registry_slug = StringProperty(required=True)
  569. @cached_property
  570. def registry_helper(self):
  571. return DataRegistryHelper(self.domain, registry_slug=self.registry_slug)
  572. @property
  573. def data_domains(self):
  574. if self.globally_accessible:
  575. return self.registry_helper.participating_domains
  576. else:
  577. return self.registry_helper.visible_domains
  578. def validate(self, required=True):
  579. super().validate(required)
  580. if self.referenced_doc_type != 'CommCareCase':
  581. raise BadSpecError(
  582. _('Report contains invalid referenced_doc_type: {}').format(self.referenced_doc_type))
  583. def _get_domain_filter_spec(self):
  584. return {
  585. "type": "boolean_expression",
  586. "expression": {
  587. "type": "property_name",
  588. "property_name": "domain",
  589. },
  590. "operator": "in",
  591. "property_value": self.data_domains,
  592. }
  593. @property
  594. @memoized
  595. def default_indicators(self):
  596. default_indicators = super().default_indicators
  597. default_indicators.append(IndicatorFactory.from_spec({
  598. "column_id": "commcare_project",
  599. "type": "expression",
  600. "display_name": "Project Space",
  601. "datatype": "string",
  602. "is_nullable": False,
  603. "create_index": True,
  604. "expression": {
  605. "type": "root_doc",
  606. "expression": {
  607. "type": "property_name",
  608. "property_name": "domain"
  609. }
  610. }
  611. }, self.get_factory_context()))
  612. return default_indicators
  613. @classmethod
  614. def by_domain(cls, domain):
  615. return get_registry_data_sources_by_domain(domain)
  616. @classmethod
  617. def all_ids(cls):
  618. return get_all_registry_data_source_ids()
  619. def get_report_count(self):
  620. """
  621. Return the number of ReportConfigurations that reference this data source.
  622. """
  623. return RegistryReportConfiguration.count_by_data_source(self.domain, self._id)
  624. class ReportMeta(DocumentSchema):
  625. # `True` if this report was initially constructed by the report builder.
  626. created_by_builder = BooleanProperty(default=False)
  627. report_builder_version = StringProperty(default="")
  628. # `True` if this report was ever edited in the advanced JSON UIs (after June 7, 2016)
  629. edited_manually = BooleanProperty(default=False)
  630. last_modified = DateTimeProperty()
  631. builder_report_type = StringProperty(choices=['chart', 'list', 'table', 'worker', 'map'])
  632. builder_source_type = StringProperty(choices=REPORT_BUILDER_DATA_SOURCE_TYPE_VALUES)
  633. # If this is a linked report, this is the ID of the report this pulls from
  634. master_id = StringProperty()
  635. class ReportConfiguration(QuickCachedDocumentMixin, Document):
  636. """
  637. A report configuration. These map 1:1 with reports that show up in the UI.
  638. """
  639. domain = StringProperty(required=True)
  640. visible = BooleanProperty(default=True)
  641. # config_id of the datasource
  642. config_id = StringProperty(required=True)
  643. data_source_type = StringProperty(default=DATA_SOURCE_TYPE_STANDARD,
  644. choices=[DATA_SOURCE_TYPE_STANDARD, DATA_SOURCE_TYPE_AGGREGATE])
  645. title = StringProperty()
  646. description = StringProperty()
  647. aggregation_columns = StringListProperty()
  648. filters = ListProperty()
  649. columns = ListProperty()
  650. configured_charts = ListProperty()
  651. sort_expression = ListProperty()
  652. distinct_on = ListProperty()
  653. soft_rollout = DecimalProperty(default=0) # no longer used
  654. report_meta = SchemaProperty(ReportMeta)
  655. custom_query_provider = StringProperty(required=False)
  656. class Meta(object):
  657. # prevent JsonObject from auto-converting dates etc.
  658. string_conversions = ()
  659. def __str__(self):
  660. return '{} - {}'.format(self.domain, self.title)
  661. def save(self, *args, **kwargs):
  662. self.report_meta.last_modified = datetime.utcnow()
  663. super(ReportConfiguration, self).save(*args, **kwargs)
  664. @property
  665. @memoized
  666. def filters_without_prefilters(self):
  667. return [f for f in self.filters if f['type'] != 'pre']
  668. @property
  669. @memoized
  670. def prefilters(self):
  671. return [f for f in self.filters if f['type'] == 'pre']
  672. @property
  673. @memoized
  674. def config(self):
  675. return get_datasource_config(self.config_id, self.domain, self.data_source_type)[0]
  676. @property
  677. @memoized
  678. def report_columns(self):
  679. return [ReportColumnFactory.from_spec(c, self.is_static, self.domain) for c in self.columns]
  680. @property
  681. @memoized
  682. def report_columns_by_column_id(self):
  683. return {c.column_id: c for c in self.report_columns}
  684. @property
  685. @memoized
  686. def ui_filters(self):
  687. return [ReportFilterFactory.from_spec(f, self) for f in self.filters]
  688. @property
  689. @memoized
  690. def charts(self):
  691. if (
  692. self.config_id and self.configured_charts
  693. and toggles.SUPPORT_EXPANDED_COLUMN_IN_REPORTS.enabled(self.domain)
  694. ):
  695. configured_charts = deepcopy(self.configured_charts)
  696. for chart in configured_charts:
  697. if chart['type'] == 'multibar':
  698. chart['y_axis_columns'] = self._get_expanded_y_axis_cols_for_multibar(chart['y_axis_columns'])
  699. return [ChartFactory.from_spec(g._obj) for g in configured_charts]
  700. else:
  701. return [ChartFactory.from_spec(g._obj) for g in self.configured_charts]
  702. def _get_expanded_y_axis_cols_for_multibar(self, original_y_axis_columns):
  703. y_axis_columns = []
  704. try:
  705. for y_axis_column in original_y_axis_columns:
  706. column_id = y_axis_column['column_id']
  707. column_config = self.report_columns_by_column_id[column_id]
  708. if column_config.type == 'expanded':
  709. expanded_columns = self.get_expanded_columns(column_config)
  710. for column in expanded_columns:
  711. y_axis_columns.append({
  712. 'column_id': column.slug,
  713. 'display': column.header
  714. })
  715. else:
  716. y_axis_columns.append(y_axis_column)
  717. # catch edge cases where data source table is yet to be created
  718. except DataSourceConfigurationNotFoundError:
  719. return original_y_axis_columns
  720. else:
  721. return y_axis_columns
  722. def get_expanded_columns(self, column_config):
  723. return get_expanded_column_config(
  724. self.cached_data_source.config,
  725. column_config,
  726. self.cached_data_source.lang
  727. ).columns
  728. @property
  729. @memoized
  730. def cached_data_source(self):
  731. from corehq.apps.userreports.reports.data_source import ConfigurableReportDataSource
  732. return ConfigurableReportDataSource.from_spec(self).data_source
  733. @property
  734. @memoized
  735. def location_column_id(self):
  736. cols = [col for col in self.report_columns if col.type == 'location']
  737. if cols:
  738. return cols[0].column_id
  739. @property
  740. def map_config(self):
  741. def map_col(column):
  742. if column['column_id'] != self.location_column_id:
  743. return {
  744. 'column_id': column['column_id'],
  745. 'label': column['display']
  746. }
  747. if self.location_column_id:
  748. return {
  749. 'location_column_id': self.location_column_id,
  750. 'layer_name': {
  751. 'XFormInstance': _('Forms'),
  752. 'CommCareCase': _('Cases')
  753. }.get(self.config.referenced_doc_type, "Layer"),
  754. 'columns': [x for x in (map_col(col) for col in self.columns) if x]
  755. }
  756. @property
  757. @memoized
  758. def sort_order(self):
  759. return [ReportOrderByFactory.from_spec(e) for e in self.sort_expression]
  760. @property
  761. def table_id(self):
  762. return self.config.table_id
  763. def get_ui_filter(self, filter_slug):
  764. for filter in self.ui_filters:
  765. if filter.name == filter_slug:
  766. return filter
  767. return None
  768. def get_languages(self):
  769. """
  770. Return the languages used in this report's column and filter display properties.
  771. Note that only explicitly identified languages are returned. So, if the
  772. display properties are all strings, "en" would not be returned.
  773. """
  774. langs = set()
  775. for item in self.columns + self.filters:
  776. if isinstance(item.get('display'), dict):
  777. langs |= set(item['display'].keys())
  778. return langs
  779. def validate(self, required=True):
  780. from corehq.apps.userreports.reports.data_source import ConfigurableReportDataSource
  781. def _check_for_duplicates(supposedly_unique_list, error_msg):
  782. # http://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-python-list
  783. duplicate_items = set(
  784. [item for item in supposedly_unique_list if supposedly_unique_list.count(item) > 1]
  785. )
  786. if len(duplicate_items) > 0:
  787. raise BadSpecError(
  788. _(error_msg).format(', '.join(sorted(duplicate_items)))
  789. )
  790. super(ReportConfiguration, self).validate(required)
  791. # check duplicates before passing to factory since it chokes on them
  792. _check_for_duplicates(
  793. [FilterSpec.wrap(f).slug for f in self.filters],
  794. 'Filters cannot contain duplicate slugs: {}',
  795. )
  796. _check_for_duplicates(
  797. [column_id for c in self.report_columns for column_id in c.get_column_ids()],
  798. 'Columns cannot contain duplicate column_ids: {}',
  799. )
  800. # these calls all implicitly do validation
  801. ConfigurableReportDataSource.from_spec(self)
  802. self.ui_filters
  803. self.charts
  804. self.sort_order
  805. @classmethod
  806. @quickcache(['cls.__name__', 'domain'])
  807. def by_domain(cls, domain):
  808. return get_report_configs_for_domain(domain)
  809. @classmethod
  810. @quickcache(['cls.__name__', 'domain', 'data_source_id'])
  811. def count_by_data_source(cls, domain, data_source_id):
  812. return get_number_of_report_configs_by_data_source(domain, data_source_id)
  813. def clear_caches(self):
  814. super(ReportConfiguration, self).clear_caches()
  815. self.by_domain.clear(self.__class__, self.domain)
  816. self.count_by_data_source.clear(self.__class__, self.domain, self.config_id)
  817. @property
  818. def is_static(self):
  819. return report_config_id_is_static(self._id)
  820. STATIC_PREFIX = 'static-'
  821. CUSTOM_REPORT_PREFIX = 'custom-'
  822. class RegistryReportConfiguration(ReportConfiguration):
  823. @classmethod
  824. @quickcache(['cls.__name__', 'domain'])
  825. def by_domain(cls, domain):
  826. return get_registry_report_configs_for_domain(domain)
  827. @classmethod
  828. @quickcache(['cls.__name__', 'domain', 'data_source_id'])
  829. def count_by_data_source(cls, domain, data_source_id):
  830. return get_number_of_registry_report_configs_by_data_source(domain, data_source_id)
  831. @property
  832. def registry_slug(self):
  833. return self.config.registry_slug
  834. @cached_property
  835. def registry_helper(self):
  836. return DataRegistryHelper(self.domain, registry_slug=self.registry_slug)
  837. @property
  838. @memoized
  839. def config(self):
  840. try:
  841. config = get_document_or_not_found(RegistryDataSourceConfiguration, self.domain, self.config_id)
  842. except DocumentNotFound:
  843. raise DataSourceConfigurationNotFoundError(_(
  844. 'The data source referenced by this report could not be found.'
  845. ))
  846. return config
  847. class StaticDataSourceConfiguration(JsonObject):
  848. """
  849. For custom data sources maintained in the repository.
  850. This class keeps the full list of static data source configurations relevant to the
  851. current environment in memory and upon requests builds a new data source configuration
  852. from the static config.
  853. See 0002-keep-static-ucr-configurations-in-memory.md
  854. """
  855. _datasource_id_prefix = STATIC_PREFIX
  856. domains = ListProperty(required=True)
  857. server_environment = ListProperty(required=True)
  858. config = DictProperty()
  859. mirrored_engine_ids = SchemaListProperty(MirroredEngineIds)
  860. @classmethod
  861. def get_doc_id(cls, domain, table_id):
  862. return '{}{}-{}'.format(cls._datasource_id_prefix, domain, table_id)
  863. @classmethod
  864. @memoized
  865. def by_id_mapping(cls):
  866. """Memoized method that maps domains to static data source config"""
  867. return {
  868. cls.get_doc_id(domain, wrapped.config['table_id']): (domain, wrapped)
  869. for wrapped in cls._all()
  870. for domain in wrapped.domains
  871. }
  872. @classmethod
  873. def _all(cls):
  874. """
  875. :return: Generator of all wrapped configs read from disk
  876. """
  877. def __get_all():
  878. paths = list(settings.STATIC_DATA_SOURCES)
  879. paths.extend(static_ucr_data_source_paths())
  880. for path_or_glob in paths:
  881. if os.path.isfile(path_or_glob):
  882. yield _get_wrapped_object_from_file(path_or_glob, cls)
  883. else:
  884. files = glob.glob(path_or_glob)
  885. for path in files:
  886. yield _get_wrapped_object_from_file(path, cls)
  887. for provider_path in settings.STATIC_DATA_SOURCE_PROVIDERS:
  888. provider_fn = to_function(provider_path, failhard=True)
  889. for wrapped, path in provider_fn():
  890. yield wrapped
  891. return __get_all() if settings.UNIT_TESTING else _filter_by_server_env(__get_all())
  892. @classmethod
  893. def all(cls):
  894. """Unoptimized method that get's all configs by re-reading from disk"""
  895. for wrapped in cls._all():
  896. for domain in wrapped.domains:
  897. yield cls._get_datasource_config(wrapped, domain)
  898. @classmethod
  899. def by_domain(cls, domain):
  900. return [
  901. cls._get_datasource_config(wrapped, dom)
  902. for dom, wrapped in cls.by_id_mapping().values()
  903. if domain == dom
  904. ]
  905. @classmethod
  906. def by_id(cls, config_id):
  907. try:
  908. domain, wrapped = cls.by_id_mapping()[config_id]
  909. except KeyError:
  910. raise StaticDataSourceConfigurationNotFoundError(_(
  911. 'The data source %(config_id)s referenced by this report could not be found.'
  912. ) % {'config_id': config_id})
  913. return cls._get_datasource_config(wrapped, domain)
  914. @classmethod
  915. def _get_datasource_config(cls, static_config, domain):
  916. doc = deepcopy(static_config.to_json()['config'])
  917. doc['domain'] = domain
  918. doc['_id'] = cls.get_doc_id(domain, doc['table_id'])
  919. def _get_mirrored_engine_ids():
  920. for env in static_config.mirrored_engine_ids:
  921. if env.server_environment == settings.SERVER_ENVIRONMENT:
  922. return env.engine_ids
  923. return []
  924. doc['mirrored_engine_ids'] = _get_mirrored_engine_ids()
  925. return DataSourceConfiguration.wrap(doc)
  926. class StaticReportConfiguration(JsonObject):
  927. """
  928. For statically defined reports based off of custom data sources
  929. This class keeps the full list of static report configurations relevant to the
  930. current environment in memory and upon requests builds a new report configuration
  931. from the static report config.
  932. See 0002-keep-static-ucr-configurations-in-memory.md
  933. """
  934. domains = ListProperty(required=True)
  935. report_id = StringProperty(validators=(_check_ids))
  936. data_source_table = StringProperty()
  937. config = DictProperty()
  938. custom_configurable_report = StringProperty()
  939. server_environment = ListProperty(required=True)
  940. @classmethod
  941. def get_doc_id(cls, domain, report_id, custom_configurable_report):
  942. return '{}{}-{}'.format(
  943. STATIC_PREFIX if not custom_configurable_report else CUSTOM_REPORT_PREFIX,
  944. domain,
  945. report_id,
  946. )
  947. @classmethod
  948. def _all(cls):
  949. def __get_all():
  950. paths = list(settings.STATIC_UCR_REPORTS)
  951. paths.extend(static_ucr_report_paths())
  952. for path_or_glob in paths:
  953. if os.path.isfile(path_or_glob):
  954. yield _get_wrapped_object_from_file(path_or_glob, cls)
  955. else:
  956. files = glob.glob(path_or_glob)
  957. for path in files:
  958. yield _get_wrapped_object_from_file(path, cls)
  959. filter_by_env = settings.UNIT_TESTING or settings.DEBUG
  960. return __get_all() if filter_by_env else _filter_by_server_env(__get_all())
  961. @classmethod
  962. @memoized
  963. def by_id_mapping(cls):
  964. return {
  965. cls.get_doc_id(domain, wrapped.report_id, wrapped.custom_configurable_report): (domain, wrapped)
  966. for wrapped in cls._all()
  967. for domain in wrapped.domains
  968. }
  969. @classmethod
  970. def all(cls):
  971. """Only used in tests"""
  972. for wrapped in StaticReportConfiguration._all():
  973. for domain in wrapped.domains:
  974. yield cls._get_report_config(wrapped, domain)
  975. @classmethod
  976. def by_domain(cls, domain):
  977. """
  978. Returns a list of ReportConfiguration objects, NOT StaticReportConfigurations.
  979. """
  980. return [
  981. cls._get_report_config(wrapped, dom)
  982. for dom, wrapped in cls.by_id_mapping().values()
  983. if domain == dom
  984. ]
  985. @classmethod
  986. def by_id(cls, config_id, domain):
  987. """Returns a ReportConfiguration object, NOT StaticReportConfigurations.
  988. """
  989. try:
  990. report_domain, wrapped = cls.by_id_mapping()[config_id]
  991. except KeyError:
  992. raise BadSpecError(_('The report configuration referenced by this report could '
  993. 'not be found: %(report_id)s') % {'report_id': config_id})
  994. if domain and report_domain != domain:
  995. raise DocumentNotFound("Document {} of class {} not in domain {}!".format(
  996. config_id,
  997. ReportConfiguration.__class__.__name__,
  998. domain,
  999. ))
  1000. return cls._get_report_config(wrapped, report_domain)
  1001. @classmethod
  1002. def by_ids(cls, config_ids):
  1003. mapping = cls.by_id_mapping()
  1004. config_by_ids = {}
  1005. for config_id in set(config_ids):
  1006. try:
  1007. domain, wrapped = mapping[config_id]
  1008. except KeyError:
  1009. raise ReportConfigurationNotFoundError(_(
  1010. "The following report configuration could not be found: {}".format(config_id)
  1011. ))
  1012. config_by_ids[config_id] = cls._get_report_config(wrapped, domain)
  1013. return config_by_ids
  1014. @classmethod
  1015. def report_class_by_domain_and_id(cls, domain, config_id):
  1016. try:
  1017. report_domain, wrapped = cls.by_id_mapping()[config_id]
  1018. except KeyError:
  1019. raise BadSpecError(
  1020. _('The report configuration referenced by this report could not be found.')
  1021. )
  1022. if report_domain != domain:
  1023. raise DocumentNotFound("Document {} of class {} not in domain {}!".format(
  1024. config_id,
  1025. ReportConfiguration.__class__.__name__,
  1026. domain,
  1027. ))
  1028. return wrapped.custom_configurable_report
  1029. @classmethod
  1030. def _get_report_config(cls, static_config, domain):
  1031. doc = copy(static_config.to_json()['config'])
  1032. doc['domain'] = domain
  1033. doc['_id'] = cls.get_doc_id(domain, static_config.report_id, static_config.custom_configurable_report)
  1034. doc['config_id'] = StaticDataSourceConfiguration.get_doc_id(domain, static_config.data_source_table)
  1035. return ReportConfiguration.wrap(doc)
  1036. class AsyncIndicator(models.Model):
  1037. """Indicator that has not yet been processed
  1038. These indicators will be picked up by a queue and placed into celery to be
  1039. saved. Once saved to the data sources, this record will be deleted
  1040. """
  1041. id = models.BigAutoField(primary_key=True)
  1042. doc_id = models.CharField(max_length=255, null=False, unique=True)
  1043. doc_type = models.CharField(max_length=126, null=False)
  1044. domain = models.CharField(max_length=126, null=False, db_index=True)
  1045. indicator_config_ids = ArrayField(
  1046. models.CharField(max_length=126, null=True, blank=True),
  1047. null=False
  1048. )
  1049. date_created = models.DateTimeField(auto_now_add=True, db_index=True)
  1050. date_queued = models.DateTimeField(null=True, db_index=True)
  1051. unsuccessful_attempts = models.IntegerField(default=0)
  1052. class Meta(object):
  1053. ordering = ["date_created"]
  1054. @classmethod
  1055. def update_record(cls, doc_id, doc_type, domain, config_ids):
  1056. if not isinstance(config_ids, list):
  1057. config_ids = list(config_ids)
  1058. config_ids = sorted(config_ids)
  1059. indicator, created = cls.objects.get_or_create(
  1060. doc_id=doc_id, doc_type=doc_type, domain=domain,
  1061. defaults={'indicator_config_ids': config_ids}
  1062. )
  1063. if created:
  1064. return indicator
  1065. elif set(config_ids) == indicator.indicator_config_ids:
  1066. return indicator
  1067. with CriticalSection([get_async_indicator_modify_lock_key(doc_id)]):
  1068. # Add new config ids. Need to grab indicator again in case it was
  1069. # processed since we called get_or_create
  1070. try:
  1071. indicator = cls.objects.get(doc_id=doc_id)
  1072. except cls.DoesNotExist:
  1073. indicator = AsyncIndicator.objects.create(
  1074. doc_id=doc_id,
  1075. doc_type=doc_type,
  1076. domain=domain,
  1077. indicator_config_ids=config_ids
  1078. )
  1079. else:
  1080. current_config_ids = set(indicator.indicator_config_ids)
  1081. config_ids = set(config_ids)
  1082. if config_ids - current_config_ids:
  1083. new_config_ids = sorted(list(current_config_ids.union(config_ids)))
  1084. indicator.indicator_config_ids = new_config_ids
  1085. indicator.unsuccessful_attempts = 0
  1086. indicator.save()
  1087. return indicator
  1088. @classmethod
  1089. def update_from_kafka_change(cls, change, config_ids):
  1090. return cls.update_record(
  1091. change.id, change.document['doc_type'], change.document['domain'], config_ids
  1092. )
  1093. def update_failure(self, to_remove):
  1094. self.refresh_from_db(fields=['indicator_config_ids'])
  1095. new_indicators = set(self.indicator_config_ids) - set(to_remove)
  1096. self.indicator_config_ids = sorted(list(new_indicators))
  1097. self.unsuccessful_attempts += 1
  1098. self.date_queued = None
  1099. @classmethod
  1100. def bulk_creation(cls, doc_ids, doc_type, domain, config_ids):
  1101. """Ignores the locking in update_record
  1102. Should only be used if you know the table is not otherwise being used,
  1103. and the doc ids you're supplying are not currently being used in another
  1104. asynchronous table.
  1105. For example the first build of a table, or any complete rebuilds.
  1106. If after reading the above and you're still wondering whether it's safe
  1107. to use, don't.
  1108. """
  1109. AsyncIndicator.objects.bulk_create([
  1110. AsyncIndicator(doc_id=doc_id, doc_type=doc_type, domain=domain, indicator_config_ids=config_ids)
  1111. for doc_id in doc_ids
  1112. ])
  1113. @classmethod
  1114. def bulk_update_records(cls, configs_by_docs, domain, doc_type_by_id):
  1115. # type (Dict[str, List[str]], str, Dict[str, str]) -> None
  1116. # configs_by_docs should be a dict of doc_id -> list of config_ids
  1117. if not configs_by_docs:
  1118. return
  1119. doc_ids = list(configs_by_docs.keys())
  1120. current_indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids).all()
  1121. to_update = []
  1122. for indicator in current_indicators:
  1123. new_configs = set(configs_by_docs[indicator.doc_id])
  1124. current_configs = set(indicator.indicator_config_ids)
  1125. if not new_configs.issubset(current_configs):
  1126. indicator.indicator_config_ids = sorted(current_configs.union(new_configs))
  1127. indicator.unsuccessful_attempts = 0
  1128. to_update.append(indicator)
  1129. if to_update:
  1130. bulk_update_helper(to_update)
  1131. new_doc_ids = set(doc_ids) - set([i.doc_id for i in current_indicators])
  1132. AsyncIndicator.objects.bulk_create([
  1133. AsyncIndicator(doc_id=doc_id, doc_type=doc_type_by_id[doc_id], domain=domain,
  1134. indicator_config_ids=sorted(configs_by_docs[doc_id]))
  1135. for doc_id in new_doc_ids
  1136. ])
  1137. class InvalidUCRData(models.Model):
  1138. doc_id = models.CharField(max_length=255, null=False)
  1139. doc_type = models.CharField(max_length=126, null=False, db_index=True)
  1140. domain = models.CharField(max_length=126, null=False, db_index=True)
  1141. indicator_config_id = models.CharField(max_length=126, db_index=True)
  1142. date_created = models.DateTimeField(auto_now_add=True, db_index=True)
  1143. validation_name = models.TextField()
  1144. validation_text = models.TextField()
  1145. notes = models.TextField(null=True)
  1146. class Meta(object):
  1147. unique_together = ('doc_id', 'indicator_config_id', 'validation_name')
  1148. class UCRExpressionManager(models.Manager):
  1149. def get_filters_for_domain(self, domain, context):
  1150. return {
  1151. f.name: f.wrapped_definition(context)
  1152. for f in self.filter(domain=domain, expression_type=UCR_NAMED_FILTER)
  1153. }
  1154. def get_expressions_for_domain(self,