/corehq/apps/userreports/models.py
Python | 1361 lines | 1301 code | 29 blank | 31 comment | 7 complexity | b26c157e3f7dc2027b4d138c78514639 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
- import glob
- import json
- import os
- import re
- from collections import namedtuple
- from copy import copy, deepcopy
- from corehq import toggles
- from datetime import datetime
- from uuid import UUID
- from django.conf import settings
- from django.contrib.postgres.fields import ArrayField
- from django.core.serializers.json import DjangoJSONEncoder
- from django.db import models
- from django.utils.functional import cached_property
- from django.utils.translation import gettext as _
- import yaml
- from couchdbkit.exceptions import BadValueError
- from django_bulk_update.helper import bulk_update as bulk_update_helper
- from jsonpath_ng.ext import parser
- from memoized import memoized
- from corehq.apps.domain.models import AllowedUCRExpressionSettings
- from dimagi.ext.couchdbkit import (
- BooleanProperty,
- DateTimeProperty,
- DecimalProperty,
- DictProperty,
- Document,
- DocumentSchema,
- IntegerProperty,
- ListProperty,
- SchemaListProperty,
- SchemaProperty,
- StringListProperty,
- StringProperty,
- )
- from dimagi.ext.jsonobject import JsonObject
- from dimagi.utils.couch import CriticalSection
- from dimagi.utils.couch.bulk import get_docs
- from dimagi.utils.couch.database import iter_docs
- from dimagi.utils.couch.undo import is_deleted
- from dimagi.utils.dates import DateSpan
- from dimagi.utils.modules import to_function
- from corehq.apps.cachehq.mixins import (
- CachedCouchDocumentMixin,
- QuickCachedDocumentMixin,
- )
- from corehq.apps.registry.helper import DataRegistryHelper
- from corehq.apps.userreports.app_manager.data_source_meta import (
- REPORT_BUILDER_DATA_SOURCE_TYPE_VALUES,
- )
- from corehq.apps.userreports.columns import get_expanded_column_config
- from corehq.apps.userreports.const import (
- ALL_EXPRESSION_TYPES,
- DATA_SOURCE_TYPE_AGGREGATE,
- DATA_SOURCE_TYPE_STANDARD,
- FILTER_INTERPOLATION_DOC_TYPES,
- UCR_NAMED_EXPRESSION,
- UCR_NAMED_FILTER,
- UCR_SQL_BACKEND,
- VALID_REFERENCED_DOC_TYPES,
- )
- from corehq.apps.userreports.dbaccessors import (
- get_all_registry_data_source_ids,
- get_datasources_for_domain,
- get_number_of_registry_report_configs_by_data_source,
- get_number_of_report_configs_by_data_source,
- get_registry_data_sources_by_domain,
- get_registry_report_configs_for_domain,
- get_report_configs_for_domain,
- )
- from corehq.apps.userreports.exceptions import (
- BadSpecError,
- DataSourceConfigurationNotFoundError,
- DuplicateColumnIdError,
- InvalidDataSourceType,
- ReportConfigurationNotFoundError,
- StaticDataSourceConfigurationNotFoundError,
- ValidationError,
- )
- from corehq.apps.userreports.expressions.factory import ExpressionFactory
- from corehq.apps.userreports.extension_points import (
- static_ucr_data_source_paths,
- static_ucr_report_paths,
- )
- from corehq.apps.userreports.filters.factory import FilterFactory
- from corehq.apps.userreports.indicators import CompoundIndicator
- from corehq.apps.userreports.indicators.factory import IndicatorFactory
- from corehq.apps.userreports.reports.factory import (
- ChartFactory,
- ReportColumnFactory,
- ReportOrderByFactory,
- )
- from corehq.apps.userreports.reports.filters.factory import ReportFilterFactory
- from corehq.apps.userreports.reports.filters.specs import FilterSpec
- from corehq.apps.userreports.specs import EvaluationContext, FactoryContext
- from corehq.apps.userreports.sql.util import decode_column_name
- from corehq.apps.userreports.util import (
- get_async_indicator_modify_lock_key,
- get_indicator_adapter,
- wrap_report_config_by_type,
- )
- from corehq.pillows.utils import get_deleted_doc_types
- from corehq.sql_db.connections import UCR_ENGINE_ID, connection_manager
- from corehq.util.couch import DocumentNotFound, get_document_or_not_found
- from corehq.util.quickcache import quickcache
- ID_REGEX_CHECK = re.compile(r"^[\w\-:]+$")
- def _check_ids(value):
- if not ID_REGEX_CHECK.match(value):
- raise BadValueError("Invalid ID: '{}'".format(value))
- class DataSourceActionLog(models.Model):
- """
- Audit model that tracks changes to UCRs and their underlying tables.
- """
- BUILD = 'build'
- MIGRATE = 'migrate'
- REBUILD = 'rebuild'
- DROP = 'drop'
- domain = models.CharField(max_length=126, null=False, db_index=True)
- indicator_config_id = models.CharField(max_length=126, null=False, db_index=True)
- initiated_by = models.CharField(max_length=126, null=True, blank=True)
- action_source = models.CharField(max_length=126, null=True, db_index=True)
- date_created = models.DateTimeField(auto_now_add=True)
- action = models.CharField(max_length=32, choices=(
- (BUILD, _('Build')),
- (MIGRATE, _('Migrate')),
- (REBUILD, _('Rebuild')),
- (DROP, _('Drop')),
- ), db_index=True, null=False)
- migration_diffs = models.JSONField(null=True, blank=True)
- # True for actions that were skipped because the data source
- # was marked with ``disable_destructive_rebuild``
- skip_destructive = models.BooleanField(default=False)
- class SQLColumnIndexes(DocumentSchema):
- column_ids = StringListProperty()
- class SQLPartition(DocumentSchema):
- """Uses architect library to partition
- http://architect.readthedocs.io/features/partition/index.html
- """
- column = StringProperty()
- subtype = StringProperty(choices=['date', 'string_firstchars', 'string_lastchars'])
- constraint = StringProperty()
- class SQLSettings(DocumentSchema):
- partition_config = SchemaListProperty(SQLPartition) # no longer used
- primary_key = ListProperty()
- class DataSourceBuildInformation(DocumentSchema):
- """
- A class to encapsulate meta information about the process through which
- its DataSourceConfiguration was configured and built.
- """
- # Either the case type or the form xmlns that this data source is based on.
- source_id = StringProperty()
- # The app that the form belongs to, or the app that was used to infer the case properties.
- app_id = StringProperty()
- # The version of the app at the time of the data source's configuration.
- app_version = IntegerProperty()
- # The registry_slug associated with the registry of the report.
- registry_slug = StringProperty()
- # True if the data source has been built, that is, if the corresponding SQL table has been populated.
- finished = BooleanProperty(default=False)
- # Start time of the most recent build SQL table celery task.
- initiated = DateTimeProperty()
- # same as previous attributes but used for rebuilding tables in place
- finished_in_place = BooleanProperty(default=False)
- initiated_in_place = DateTimeProperty()
- rebuilt_asynchronously = BooleanProperty(default=False)
- class DataSourceMeta(DocumentSchema):
- build = SchemaProperty(DataSourceBuildInformation)
- # If this is a linked datasource, this is the ID of the datasource this pulls from
- master_id = StringProperty()
- class Validation(DocumentSchema):
- name = StringProperty(required=True)
- expression = DictProperty(required=True)
- error_message = StringProperty(required=True)
- class AbstractUCRDataSource(object):
- """
- Base wrapper class for datasource-like things to be used in reports.
- This doesn't use abc because of this issue: https://stackoverflow.com/q/8723639/8207
- This is not really a "designed" interface so much as the set of methods/properties that
- the objects need to have in order to work with UCRs.
- In addition to the methods defined, the following should also exist:
- domain: a string
- engine_id: a string
- table_id: a string
- display_name: a string
- sql_column_indexes: a list of SQLColumnIndexes
- sql_settings: a SQLSettings object
- """
- @property
- def data_source_id(self):
- """
- The data source's ID
- """
- raise NotImplementedError()
- def get_columns(self):
- raise NotImplementedError()
- @property
- def pk_columns(self):
- raise NotImplementedError()
- class MirroredEngineIds(DocumentSchema):
- server_environment = StringProperty()
- engine_ids = StringListProperty()
- class DataSourceConfiguration(CachedCouchDocumentMixin, Document, AbstractUCRDataSource):
- """
- A data source configuration. These map 1:1 with database tables that get created.
- Each data source can back an arbitrary number of reports.
- """
- domain = StringProperty(required=True)
- engine_id = StringProperty(default=UCR_ENGINE_ID)
- backend_id = StringProperty(default=UCR_SQL_BACKEND) # no longer used
- referenced_doc_type = StringProperty(required=True)
- table_id = StringProperty(required=True)
- display_name = StringProperty()
- base_item_expression = DictProperty()
- configured_filter = DictProperty()
- configured_indicators = ListProperty()
- named_expressions = DictProperty()
- named_filters = DictProperty()
- meta = SchemaProperty(DataSourceMeta)
- is_deactivated = BooleanProperty(default=False)
- last_modified = DateTimeProperty()
- asynchronous = BooleanProperty(default=False)
- is_available_in_analytics = BooleanProperty(default=False)
- sql_column_indexes = SchemaListProperty(SQLColumnIndexes)
- disable_destructive_rebuild = BooleanProperty(default=False)
- sql_settings = SchemaProperty(SQLSettings)
- validations = SchemaListProperty(Validation)
- mirrored_engine_ids = ListProperty(default=[])
- class Meta(object):
- # prevent JsonObject from auto-converting dates etc.
- string_conversions = ()
- def __str__(self):
- return '{} - {}'.format(self.domain, self.display_name)
- @property
- def is_deleted(self):
- return is_deleted(self)
- def save(self, **params):
- self.last_modified = datetime.utcnow()
- super(DataSourceConfiguration, self).save(**params)
- @property
- def data_source_id(self):
- return self._id
- def filter(self, document, eval_context=None):
- if eval_context is None:
- eval_context = EvaluationContext(document)
- filter_fn = self._get_main_filter()
- return filter_fn(document, eval_context)
- def deleted_filter(self, document):
- filter_fn = self._get_deleted_filter()
- return filter_fn and filter_fn(document, EvaluationContext(document, 0))
- @property
- def has_validations(self):
- return len(self.validations) > 0
- def validate_document(self, document, eval_context=None):
- if eval_context is None:
- eval_context = EvaluationContext(document)
- errors = []
- for validation in self._validations():
- if validation.validation_function(document, eval_context) is False:
- errors.append((validation.name, validation.error_message))
- if errors:
- raise ValidationError(errors)
- @memoized
- def _validations(self):
- return [
- _Validation(
- validation.name,
- validation.error_message,
- FilterFactory.from_spec(validation.expression, context=self.get_factory_context())
- )
- for validation in self.validations
- ]
- @memoized
- def _get_main_filter(self):
- return self._get_filter([self.referenced_doc_type])
- @memoized
- def _get_deleted_filter(self):
- return self._get_filter(get_deleted_doc_types(self.referenced_doc_type), include_configured=False)
- def _get_filter(self, doc_types, include_configured=True):
- if not doc_types:
- return None
- extras = (
- [self.configured_filter]
- if include_configured and self.configured_filter else []
- )
- built_in_filters = [
- self._get_domain_filter_spec(),
- {
- 'type': 'or',
- 'filters': [
- {
- "type": "boolean_expression",
- "expression": {
- "type": "property_name",
- "property_name": "doc_type",
- },
- "operator": "eq",
- "property_value": doc_type,
- }
- for doc_type in doc_types
- ],
- },
- ]
- return FilterFactory.from_spec(
- {
- 'type': 'and',
- 'filters': built_in_filters + extras,
- },
- context=self.get_factory_context(),
- )
- def _get_domain_filter_spec(self):
- return {
- "type": "boolean_expression",
- "expression": {
- "type": "property_name",
- "property_name": "domain",
- },
- "operator": "eq",
- "property_value": self.domain,
- }
- @property
- @memoized
- def named_expression_objects(self):
- named_expression_specs = deepcopy(self.named_expressions)
- named_expressions = {}
- spec_error = None
- factory_context = FactoryContext(named_expressions=named_expressions, named_filters={}, domain=self.domain)
- while named_expression_specs:
- number_generated = 0
- for name, expression in list(named_expression_specs.items()):
- try:
- factory_context.named_expressions = named_expressions
- named_expressions[name] = ExpressionFactory.from_spec(expression, factory_context)
- number_generated += 1
- del named_expression_specs[name]
- except BadSpecError as bad_spec_error:
- # maybe a nested name resolution issue, try again on the next pass
- spec_error = bad_spec_error
- if number_generated == 0 and named_expression_specs:
- # we unsuccessfully generated anything on this pass and there are still unresolved
- # references. we have to fail.
- assert spec_error is not None
- raise spec_error
- return named_expressions
- @property
- @memoized
- def named_filter_objects(self):
- factory_context = FactoryContext(self.named_expression_objects, {}, domain=self.domain)
- return {
- name: FilterFactory.from_spec(filter, factory_context)
- for name, filter in self.named_filters.items()
- }
- def get_factory_context(self):
- return FactoryContext(self.named_expression_objects, self.named_filter_objects, self.domain)
- @property
- @memoized
- def default_indicators(self):
- default_indicators = [IndicatorFactory.from_spec({
- "column_id": "doc_id",
- "type": "expression",
- "display_name": "document id",
- "datatype": "string",
- "is_nullable": False,
- "is_primary_key": True,
- "expression": {
- "type": "root_doc",
- "expression": {
- "type": "property_name",
- "property_name": "_id"
- }
- }
- }, self.get_factory_context())]
- default_indicators.append(IndicatorFactory.from_spec({
- "type": "inserted_at",
- }, self.get_factory_context()))
- if self.base_item_expression:
- default_indicators.append(IndicatorFactory.from_spec({
- "type": "repeat_iteration",
- }, self.get_factory_context()))
- return default_indicators
- @property
- @memoized
- def indicators(self):
- return CompoundIndicator(
- self.display_name,
- self.default_indicators + [
- IndicatorFactory.from_spec(indicator, self.get_factory_context())
- for indicator in self.configured_indicators
- ],
- None,
- )
- @property
- @memoized
- def parsed_expression(self):
- if self.base_item_expression:
- return ExpressionFactory.from_spec(self.base_item_expression, context=self.get_factory_context())
- return None
- @memoized
- def get_columns(self):
- return self.indicators.get_columns()
- @property
- @memoized
- def columns_by_id(self):
- return {c.id: c for c in self.get_columns()}
- def get_column_by_id(self, column_id):
- return self.columns_by_id.get(column_id)
- def get_items(self, document, eval_context=None):
- if self.filter(document, eval_context):
- if not self.base_item_expression:
- return [document]
- else:
- result = self.parsed_expression(document, eval_context)
- if result is None:
- return []
- elif isinstance(result, list):
- return result
- else:
- return [result]
- else:
- return []
- def get_all_values(self, doc, eval_context=None):
- if not eval_context:
- eval_context = EvaluationContext(doc)
- if self.has_validations:
- try:
- self.validate_document(doc, eval_context)
- except ValidationError as e:
- for error in e.errors:
- InvalidUCRData.objects.get_or_create(
- doc_id=doc['_id'],
- indicator_config_id=self._id,
- validation_name=error[0],
- defaults={
- 'doc_type': doc['doc_type'],
- 'domain': doc['domain'],
- 'validation_text': error[1],
- }
- )
- return []
- rows = []
- for item in self.get_items(doc, eval_context):
- values = self.indicators.get_values(item, eval_context)
- rows.append(values)
- eval_context.increment_iteration()
- return rows
- def get_report_count(self):
- """
- Return the number of ReportConfigurations that reference this data source.
- """
- return ReportConfiguration.count_by_data_source(self.domain, self._id)
- def validate_db_config(self):
- mirrored_engine_ids = self.mirrored_engine_ids
- if not mirrored_engine_ids:
- return
- if self.engine_id in mirrored_engine_ids:
- raise BadSpecError("mirrored_engine_ids list should not contain engine_id")
- for engine_id in mirrored_engine_ids:
- if not connection_manager.engine_id_is_available(engine_id):
- raise BadSpecError(
- "DB for engine_id {} is not availble".format(engine_id)
- )
- if not connection_manager.resolves_to_unique_dbs(mirrored_engine_ids + [self.engine_id]):
- raise BadSpecError("No two engine_ids should point to the same database")
- @property
- def data_domains(self):
- return [self.domain]
- def _verify_contains_allowed_expressions(self):
- """
- Raise BadSpecError if any disallowed expression is present in datasource
- """
- disallowed_expressions = AllowedUCRExpressionSettings.disallowed_ucr_expressions(self.domain)
- if 'base_item_expression' in disallowed_expressions and self.base_item_expression:
- raise BadSpecError(_(f'base_item_expression is not allowed for domain {self.domain}'))
- doubtful_keys = dict(indicators=self.configured_indicators, expressions=self.named_expressions)
- for expr in disallowed_expressions:
- results = parser.parse(f"$..[*][?type={expr}]").find(doubtful_keys)
- if results:
- raise BadSpecError(_(f'{expr} is not allowed for domain {self.domain}'))
- def validate(self, required=True):
- super(DataSourceConfiguration, self).validate(required)
- # these two properties implicitly call other validation
- self._get_main_filter()
- self._get_deleted_filter()
- # validate indicators and column uniqueness
- columns = [c.id for c in self.indicators.get_columns()]
- unique_columns = set(columns)
- if len(columns) != len(unique_columns):
- for column in set(columns):
- columns.remove(column)
- raise DuplicateColumnIdError(columns=columns)
- if self.referenced_doc_type not in VALID_REFERENCED_DOC_TYPES:
- raise BadSpecError(
- _('Report contains invalid referenced_doc_type: {}').format(self.referenced_doc_type))
- self._verify_contains_allowed_expressions()
- self.parsed_expression
- self.pk_columns
- @classmethod
- def by_domain(cls, domain):
- return get_datasources_for_domain(domain)
- @classmethod
- def all_ids(cls):
- return [res['id'] for res in cls.get_db().view('userreports/data_sources_by_build_info',
- reduce=False, include_docs=False)]
- @classmethod
- def all(cls):
- for result in iter_docs(cls.get_db(), cls.all_ids()):
- yield cls.wrap(result)
- @property
- def is_static(self):
- return id_is_static(self._id)
- def deactivate(self, initiated_by=None):
- if not self.is_static:
- self.is_deactivated = True
- self.save()
- get_indicator_adapter(self).drop_table(initiated_by=initiated_by, source='deactivate-data-source')
- def get_case_type_or_xmlns_filter(self):
- """Returns a list of case types or xmlns from the filter of this data source.
- If this can't figure out the case types or xmlns's that filter, then returns [None]
- Currently always returns a list because it is called by a loop in _iteratively_build_table
- Could be reworked to return [] to be more pythonic
- """
- if self.referenced_doc_type not in FILTER_INTERPOLATION_DOC_TYPES:
- return [None]
- property_name = FILTER_INTERPOLATION_DOC_TYPES[self.referenced_doc_type]
- prop_value = self._filter_interploation_helper(self.configured_filter, property_name)
- return prop_value or [None]
- def _filter_interploation_helper(self, config_filter, property_name):
- filter_type = config_filter.get('type')
- if filter_type == 'and':
- sub_config_filters = [
- self._filter_interploation_helper(f, property_name)
- for f in config_filter.get('filters')
- ]
- for filter_ in sub_config_filters:
- if filter_[0]:
- return filter_
- if filter_type != 'boolean_expression':
- return [None]
- if config_filter['operator'] not in ('eq', 'in'):
- return [None]
- expression = config_filter['expression']
- if not isinstance(expression, dict):
- return [None]
- if expression['type'] == 'property_name' and expression['property_name'] == property_name:
- prop_value = config_filter['property_value']
- if not isinstance(prop_value, list):
- prop_value = [prop_value]
- return prop_value
- return [None]
- @property
- def pk_columns(self):
- columns = []
- for col in self.get_columns():
- if col.is_primary_key:
- column_name = decode_column_name(col)
- columns.append(column_name)
- if self.sql_settings.primary_key:
- if set(columns) != set(self.sql_settings.primary_key):
- raise BadSpecError("Primary key columns must have is_primary_key set to true", self.data_source_id)
- columns = self.sql_settings.primary_key
- return columns
- class RegistryDataSourceConfiguration(DataSourceConfiguration):
- """This is a special data source that can contain data from
- multiple domains. These data sources are built from
- data accessible to the domain via a Data Registry."""
- # this field indicates whether the data source is available
- # to all domains participating in the registry
- globally_accessible = BooleanProperty(default=False)
- registry_slug = StringProperty(required=True)
- @cached_property
- def registry_helper(self):
- return DataRegistryHelper(self.domain, registry_slug=self.registry_slug)
- @property
- def data_domains(self):
- if self.globally_accessible:
- return self.registry_helper.participating_domains
- else:
- return self.registry_helper.visible_domains
- def validate(self, required=True):
- super().validate(required)
- if self.referenced_doc_type != 'CommCareCase':
- raise BadSpecError(
- _('Report contains invalid referenced_doc_type: {}').format(self.referenced_doc_type))
- def _get_domain_filter_spec(self):
- return {
- "type": "boolean_expression",
- "expression": {
- "type": "property_name",
- "property_name": "domain",
- },
- "operator": "in",
- "property_value": self.data_domains,
- }
- @property
- @memoized
- def default_indicators(self):
- default_indicators = super().default_indicators
- default_indicators.append(IndicatorFactory.from_spec({
- "column_id": "commcare_project",
- "type": "expression",
- "display_name": "Project Space",
- "datatype": "string",
- "is_nullable": False,
- "create_index": True,
- "expression": {
- "type": "root_doc",
- "expression": {
- "type": "property_name",
- "property_name": "domain"
- }
- }
- }, self.get_factory_context()))
- return default_indicators
- @classmethod
- def by_domain(cls, domain):
- return get_registry_data_sources_by_domain(domain)
- @classmethod
- def all_ids(cls):
- return get_all_registry_data_source_ids()
- def get_report_count(self):
- """
- Return the number of ReportConfigurations that reference this data source.
- """
- return RegistryReportConfiguration.count_by_data_source(self.domain, self._id)
- class ReportMeta(DocumentSchema):
- # `True` if this report was initially constructed by the report builder.
- created_by_builder = BooleanProperty(default=False)
- report_builder_version = StringProperty(default="")
- # `True` if this report was ever edited in the advanced JSON UIs (after June 7, 2016)
- edited_manually = BooleanProperty(default=False)
- last_modified = DateTimeProperty()
- builder_report_type = StringProperty(choices=['chart', 'list', 'table', 'worker', 'map'])
- builder_source_type = StringProperty(choices=REPORT_BUILDER_DATA_SOURCE_TYPE_VALUES)
- # If this is a linked report, this is the ID of the report this pulls from
- master_id = StringProperty()
- class ReportConfiguration(QuickCachedDocumentMixin, Document):
- """
- A report configuration. These map 1:1 with reports that show up in the UI.
- """
- domain = StringProperty(required=True)
- visible = BooleanProperty(default=True)
- # config_id of the datasource
- config_id = StringProperty(required=True)
- data_source_type = StringProperty(default=DATA_SOURCE_TYPE_STANDARD,
- choices=[DATA_SOURCE_TYPE_STANDARD, DATA_SOURCE_TYPE_AGGREGATE])
- title = StringProperty()
- description = StringProperty()
- aggregation_columns = StringListProperty()
- filters = ListProperty()
- columns = ListProperty()
- configured_charts = ListProperty()
- sort_expression = ListProperty()
- distinct_on = ListProperty()
- soft_rollout = DecimalProperty(default=0) # no longer used
- report_meta = SchemaProperty(ReportMeta)
- custom_query_provider = StringProperty(required=False)
- class Meta(object):
- # prevent JsonObject from auto-converting dates etc.
- string_conversions = ()
- def __str__(self):
- return '{} - {}'.format(self.domain, self.title)
- def save(self, *args, **kwargs):
- self.report_meta.last_modified = datetime.utcnow()
- super(ReportConfiguration, self).save(*args, **kwargs)
- @property
- @memoized
- def filters_without_prefilters(self):
- return [f for f in self.filters if f['type'] != 'pre']
- @property
- @memoized
- def prefilters(self):
- return [f for f in self.filters if f['type'] == 'pre']
- @property
- @memoized
- def config(self):
- return get_datasource_config(self.config_id, self.domain, self.data_source_type)[0]
- @property
- @memoized
- def report_columns(self):
- return [ReportColumnFactory.from_spec(c, self.is_static, self.domain) for c in self.columns]
- @property
- @memoized
- def report_columns_by_column_id(self):
- return {c.column_id: c for c in self.report_columns}
- @property
- @memoized
- def ui_filters(self):
- return [ReportFilterFactory.from_spec(f, self) for f in self.filters]
- @property
- @memoized
- def charts(self):
- if (
- self.config_id and self.configured_charts
- and toggles.SUPPORT_EXPANDED_COLUMN_IN_REPORTS.enabled(self.domain)
- ):
- configured_charts = deepcopy(self.configured_charts)
- for chart in configured_charts:
- if chart['type'] == 'multibar':
- chart['y_axis_columns'] = self._get_expanded_y_axis_cols_for_multibar(chart['y_axis_columns'])
- return [ChartFactory.from_spec(g._obj) for g in configured_charts]
- else:
- return [ChartFactory.from_spec(g._obj) for g in self.configured_charts]
- def _get_expanded_y_axis_cols_for_multibar(self, original_y_axis_columns):
- y_axis_columns = []
- try:
- for y_axis_column in original_y_axis_columns:
- column_id = y_axis_column['column_id']
- column_config = self.report_columns_by_column_id[column_id]
- if column_config.type == 'expanded':
- expanded_columns = self.get_expanded_columns(column_config)
- for column in expanded_columns:
- y_axis_columns.append({
- 'column_id': column.slug,
- 'display': column.header
- })
- else:
- y_axis_columns.append(y_axis_column)
- # catch edge cases where data source table is yet to be created
- except DataSourceConfigurationNotFoundError:
- return original_y_axis_columns
- else:
- return y_axis_columns
- def get_expanded_columns(self, column_config):
- return get_expanded_column_config(
- self.cached_data_source.config,
- column_config,
- self.cached_data_source.lang
- ).columns
- @property
- @memoized
- def cached_data_source(self):
- from corehq.apps.userreports.reports.data_source import ConfigurableReportDataSource
- return ConfigurableReportDataSource.from_spec(self).data_source
- @property
- @memoized
- def location_column_id(self):
- cols = [col for col in self.report_columns if col.type == 'location']
- if cols:
- return cols[0].column_id
- @property
- def map_config(self):
- def map_col(column):
- if column['column_id'] != self.location_column_id:
- return {
- 'column_id': column['column_id'],
- 'label': column['display']
- }
- if self.location_column_id:
- return {
- 'location_column_id': self.location_column_id,
- 'layer_name': {
- 'XFormInstance': _('Forms'),
- 'CommCareCase': _('Cases')
- }.get(self.config.referenced_doc_type, "Layer"),
- 'columns': [x for x in (map_col(col) for col in self.columns) if x]
- }
- @property
- @memoized
- def sort_order(self):
- return [ReportOrderByFactory.from_spec(e) for e in self.sort_expression]
- @property
- def table_id(self):
- return self.config.table_id
- def get_ui_filter(self, filter_slug):
- for filter in self.ui_filters:
- if filter.name == filter_slug:
- return filter
- return None
- def get_languages(self):
- """
- Return the languages used in this report's column and filter display properties.
- Note that only explicitly identified languages are returned. So, if the
- display properties are all strings, "en" would not be returned.
- """
- langs = set()
- for item in self.columns + self.filters:
- if isinstance(item.get('display'), dict):
- langs |= set(item['display'].keys())
- return langs
- def validate(self, required=True):
- from corehq.apps.userreports.reports.data_source import ConfigurableReportDataSource
- def _check_for_duplicates(supposedly_unique_list, error_msg):
- # http://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-python-list
- duplicate_items = set(
- [item for item in supposedly_unique_list if supposedly_unique_list.count(item) > 1]
- )
- if len(duplicate_items) > 0:
- raise BadSpecError(
- _(error_msg).format(', '.join(sorted(duplicate_items)))
- )
- super(ReportConfiguration, self).validate(required)
- # check duplicates before passing to factory since it chokes on them
- _check_for_duplicates(
- [FilterSpec.wrap(f).slug for f in self.filters],
- 'Filters cannot contain duplicate slugs: {}',
- )
- _check_for_duplicates(
- [column_id for c in self.report_columns for column_id in c.get_column_ids()],
- 'Columns cannot contain duplicate column_ids: {}',
- )
- # these calls all implicitly do validation
- ConfigurableReportDataSource.from_spec(self)
- self.ui_filters
- self.charts
- self.sort_order
- @classmethod
- @quickcache(['cls.__name__', 'domain'])
- def by_domain(cls, domain):
- return get_report_configs_for_domain(domain)
- @classmethod
- @quickcache(['cls.__name__', 'domain', 'data_source_id'])
- def count_by_data_source(cls, domain, data_source_id):
- return get_number_of_report_configs_by_data_source(domain, data_source_id)
- def clear_caches(self):
- super(ReportConfiguration, self).clear_caches()
- self.by_domain.clear(self.__class__, self.domain)
- self.count_by_data_source.clear(self.__class__, self.domain, self.config_id)
- @property
- def is_static(self):
- return report_config_id_is_static(self._id)
- STATIC_PREFIX = 'static-'
- CUSTOM_REPORT_PREFIX = 'custom-'
- class RegistryReportConfiguration(ReportConfiguration):
- @classmethod
- @quickcache(['cls.__name__', 'domain'])
- def by_domain(cls, domain):
- return get_registry_report_configs_for_domain(domain)
- @classmethod
- @quickcache(['cls.__name__', 'domain', 'data_source_id'])
- def count_by_data_source(cls, domain, data_source_id):
- return get_number_of_registry_report_configs_by_data_source(domain, data_source_id)
- @property
- def registry_slug(self):
- return self.config.registry_slug
- @cached_property
- def registry_helper(self):
- return DataRegistryHelper(self.domain, registry_slug=self.registry_slug)
- @property
- @memoized
- def config(self):
- try:
- config = get_document_or_not_found(RegistryDataSourceConfiguration, self.domain, self.config_id)
- except DocumentNotFound:
- raise DataSourceConfigurationNotFoundError(_(
- 'The data source referenced by this report could not be found.'
- ))
- return config
- class StaticDataSourceConfiguration(JsonObject):
- """
- For custom data sources maintained in the repository.
- This class keeps the full list of static data source configurations relevant to the
- current environment in memory and upon requests builds a new data source configuration
- from the static config.
- See 0002-keep-static-ucr-configurations-in-memory.md
- """
- _datasource_id_prefix = STATIC_PREFIX
- domains = ListProperty(required=True)
- server_environment = ListProperty(required=True)
- config = DictProperty()
- mirrored_engine_ids = SchemaListProperty(MirroredEngineIds)
- @classmethod
- def get_doc_id(cls, domain, table_id):
- return '{}{}-{}'.format(cls._datasource_id_prefix, domain, table_id)
- @classmethod
- @memoized
- def by_id_mapping(cls):
- """Memoized method that maps domains to static data source config"""
- return {
- cls.get_doc_id(domain, wrapped.config['table_id']): (domain, wrapped)
- for wrapped in cls._all()
- for domain in wrapped.domains
- }
- @classmethod
- def _all(cls):
- """
- :return: Generator of all wrapped configs read from disk
- """
- def __get_all():
- paths = list(settings.STATIC_DATA_SOURCES)
- paths.extend(static_ucr_data_source_paths())
- for path_or_glob in paths:
- if os.path.isfile(path_or_glob):
- yield _get_wrapped_object_from_file(path_or_glob, cls)
- else:
- files = glob.glob(path_or_glob)
- for path in files:
- yield _get_wrapped_object_from_file(path, cls)
- for provider_path in settings.STATIC_DATA_SOURCE_PROVIDERS:
- provider_fn = to_function(provider_path, failhard=True)
- for wrapped, path in provider_fn():
- yield wrapped
- return __get_all() if settings.UNIT_TESTING else _filter_by_server_env(__get_all())
- @classmethod
- def all(cls):
- """Unoptimized method that get's all configs by re-reading from disk"""
- for wrapped in cls._all():
- for domain in wrapped.domains:
- yield cls._get_datasource_config(wrapped, domain)
- @classmethod
- def by_domain(cls, domain):
- return [
- cls._get_datasource_config(wrapped, dom)
- for dom, wrapped in cls.by_id_mapping().values()
- if domain == dom
- ]
- @classmethod
- def by_id(cls, config_id):
- try:
- domain, wrapped = cls.by_id_mapping()[config_id]
- except KeyError:
- raise StaticDataSourceConfigurationNotFoundError(_(
- 'The data source %(config_id)s referenced by this report could not be found.'
- ) % {'config_id': config_id})
- return cls._get_datasource_config(wrapped, domain)
- @classmethod
- def _get_datasource_config(cls, static_config, domain):
- doc = deepcopy(static_config.to_json()['config'])
- doc['domain'] = domain
- doc['_id'] = cls.get_doc_id(domain, doc['table_id'])
- def _get_mirrored_engine_ids():
- for env in static_config.mirrored_engine_ids:
- if env.server_environment == settings.SERVER_ENVIRONMENT:
- return env.engine_ids
- return []
- doc['mirrored_engine_ids'] = _get_mirrored_engine_ids()
- return DataSourceConfiguration.wrap(doc)
- class StaticReportConfiguration(JsonObject):
- """
- For statically defined reports based off of custom data sources
- This class keeps the full list of static report configurations relevant to the
- current environment in memory and upon requests builds a new report configuration
- from the static report config.
- See 0002-keep-static-ucr-configurations-in-memory.md
- """
- domains = ListProperty(required=True)
- report_id = StringProperty(validators=(_check_ids))
- data_source_table = StringProperty()
- config = DictProperty()
- custom_configurable_report = StringProperty()
- server_environment = ListProperty(required=True)
- @classmethod
- def get_doc_id(cls, domain, report_id, custom_configurable_report):
- return '{}{}-{}'.format(
- STATIC_PREFIX if not custom_configurable_report else CUSTOM_REPORT_PREFIX,
- domain,
- report_id,
- )
- @classmethod
- def _all(cls):
- def __get_all():
- paths = list(settings.STATIC_UCR_REPORTS)
- paths.extend(static_ucr_report_paths())
- for path_or_glob in paths:
- if os.path.isfile(path_or_glob):
- yield _get_wrapped_object_from_file(path_or_glob, cls)
- else:
- files = glob.glob(path_or_glob)
- for path in files:
- yield _get_wrapped_object_from_file(path, cls)
- filter_by_env = settings.UNIT_TESTING or settings.DEBUG
- return __get_all() if filter_by_env else _filter_by_server_env(__get_all())
- @classmethod
- @memoized
- def by_id_mapping(cls):
- return {
- cls.get_doc_id(domain, wrapped.report_id, wrapped.custom_configurable_report): (domain, wrapped)
- for wrapped in cls._all()
- for domain in wrapped.domains
- }
- @classmethod
- def all(cls):
- """Only used in tests"""
- for wrapped in StaticReportConfiguration._all():
- for domain in wrapped.domains:
- yield cls._get_report_config(wrapped, domain)
- @classmethod
- def by_domain(cls, domain):
- """
- Returns a list of ReportConfiguration objects, NOT StaticReportConfigurations.
- """
- return [
- cls._get_report_config(wrapped, dom)
- for dom, wrapped in cls.by_id_mapping().values()
- if domain == dom
- ]
- @classmethod
- def by_id(cls, config_id, domain):
- """Returns a ReportConfiguration object, NOT StaticReportConfigurations.
- """
- try:
- report_domain, wrapped = cls.by_id_mapping()[config_id]
- except KeyError:
- raise BadSpecError(_('The report configuration referenced by this report could '
- 'not be found: %(report_id)s') % {'report_id': config_id})
- if domain and report_domain != domain:
- raise DocumentNotFound("Document {} of class {} not in domain {}!".format(
- config_id,
- ReportConfiguration.__class__.__name__,
- domain,
- ))
- return cls._get_report_config(wrapped, report_domain)
- @classmethod
- def by_ids(cls, config_ids):
- mapping = cls.by_id_mapping()
- config_by_ids = {}
- for config_id in set(config_ids):
- try:
- domain, wrapped = mapping[config_id]
- except KeyError:
- raise ReportConfigurationNotFoundError(_(
- "The following report configuration could not be found: {}".format(config_id)
- ))
- config_by_ids[config_id] = cls._get_report_config(wrapped, domain)
- return config_by_ids
- @classmethod
- def report_class_by_domain_and_id(cls, domain, config_id):
- try:
- report_domain, wrapped = cls.by_id_mapping()[config_id]
- except KeyError:
- raise BadSpecError(
- _('The report configuration referenced by this report could not be found.')
- )
- if report_domain != domain:
- raise DocumentNotFound("Document {} of class {} not in domain {}!".format(
- config_id,
- ReportConfiguration.__class__.__name__,
- domain,
- ))
- return wrapped.custom_configurable_report
- @classmethod
- def _get_report_config(cls, static_config, domain):
- doc = copy(static_config.to_json()['config'])
- doc['domain'] = domain
- doc['_id'] = cls.get_doc_id(domain, static_config.report_id, static_config.custom_configurable_report)
- doc['config_id'] = StaticDataSourceConfiguration.get_doc_id(domain, static_config.data_source_table)
- return ReportConfiguration.wrap(doc)
- class AsyncIndicator(models.Model):
- """Indicator that has not yet been processed
- These indicators will be picked up by a queue and placed into celery to be
- saved. Once saved to the data sources, this record will be deleted
- """
- id = models.BigAutoField(primary_key=True)
- doc_id = models.CharField(max_length=255, null=False, unique=True)
- doc_type = models.CharField(max_length=126, null=False)
- domain = models.CharField(max_length=126, null=False, db_index=True)
- indicator_config_ids = ArrayField(
- models.CharField(max_length=126, null=True, blank=True),
- null=False
- )
- date_created = models.DateTimeField(auto_now_add=True, db_index=True)
- date_queued = models.DateTimeField(null=True, db_index=True)
- unsuccessful_attempts = models.IntegerField(default=0)
- class Meta(object):
- ordering = ["date_created"]
- @classmethod
- def update_record(cls, doc_id, doc_type, domain, config_ids):
- if not isinstance(config_ids, list):
- config_ids = list(config_ids)
- config_ids = sorted(config_ids)
- indicator, created = cls.objects.get_or_create(
- doc_id=doc_id, doc_type=doc_type, domain=domain,
- defaults={'indicator_config_ids': config_ids}
- )
- if created:
- return indicator
- elif set(config_ids) == indicator.indicator_config_ids:
- return indicator
- with CriticalSection([get_async_indicator_modify_lock_key(doc_id)]):
- # Add new config ids. Need to grab indicator again in case it was
- # processed since we called get_or_create
- try:
- indicator = cls.objects.get(doc_id=doc_id)
- except cls.DoesNotExist:
- indicator = AsyncIndicator.objects.create(
- doc_id=doc_id,
- doc_type=doc_type,
- domain=domain,
- indicator_config_ids=config_ids
- )
- else:
- current_config_ids = set(indicator.indicator_config_ids)
- config_ids = set(config_ids)
- if config_ids - current_config_ids:
- new_config_ids = sorted(list(current_config_ids.union(config_ids)))
- indicator.indicator_config_ids = new_config_ids
- indicator.unsuccessful_attempts = 0
- indicator.save()
- return indicator
- @classmethod
- def update_from_kafka_change(cls, change, config_ids):
- return cls.update_record(
- change.id, change.document['doc_type'], change.document['domain'], config_ids
- )
- def update_failure(self, to_remove):
- self.refresh_from_db(fields=['indicator_config_ids'])
- new_indicators = set(self.indicator_config_ids) - set(to_remove)
- self.indicator_config_ids = sorted(list(new_indicators))
- self.unsuccessful_attempts += 1
- self.date_queued = None
- @classmethod
- def bulk_creation(cls, doc_ids, doc_type, domain, config_ids):
- """Ignores the locking in update_record
- Should only be used if you know the table is not otherwise being used,
- and the doc ids you're supplying are not currently being used in another
- asynchronous table.
- For example the first build of a table, or any complete rebuilds.
- If after reading the above and you're still wondering whether it's safe
- to use, don't.
- """
- AsyncIndicator.objects.bulk_create([
- AsyncIndicator(doc_id=doc_id, doc_type=doc_type, domain=domain, indicator_config_ids=config_ids)
- for doc_id in doc_ids
- ])
- @classmethod
- def bulk_update_records(cls, configs_by_docs, domain, doc_type_by_id):
- # type (Dict[str, List[str]], str, Dict[str, str]) -> None
- # configs_by_docs should be a dict of doc_id -> list of config_ids
- if not configs_by_docs:
- return
- doc_ids = list(configs_by_docs.keys())
- current_indicators = AsyncIndicator.objects.filter(doc_id__in=doc_ids).all()
- to_update = []
- for indicator in current_indicators:
- new_configs = set(configs_by_docs[indicator.doc_id])
- current_configs = set(indicator.indicator_config_ids)
- if not new_configs.issubset(current_configs):
- indicator.indicator_config_ids = sorted(current_configs.union(new_configs))
- indicator.unsuccessful_attempts = 0
- to_update.append(indicator)
- if to_update:
- bulk_update_helper(to_update)
- new_doc_ids = set(doc_ids) - set([i.doc_id for i in current_indicators])
- AsyncIndicator.objects.bulk_create([
- AsyncIndicator(doc_id=doc_id, doc_type=doc_type_by_id[doc_id], domain=domain,
- indicator_config_ids=sorted(configs_by_docs[doc_id]))
- for doc_id in new_doc_ids
- ])
- class InvalidUCRData(models.Model):
- doc_id = models.CharField(max_length=255, null=False)
- doc_type = models.CharField(max_length=126, null=False, db_index=True)
- domain = models.CharField(max_length=126, null=False, db_index=True)
- indicator_config_id = models.CharField(max_length=126, db_index=True)
- date_created = models.DateTimeField(auto_now_add=True, db_index=True)
- validation_name = models.TextField()
- validation_text = models.TextField()
- notes = models.TextField(null=True)
- class Meta(object):
- unique_together = ('doc_id', 'indicator_config_id', 'validation_name')
- class UCRExpressionManager(models.Manager):
- def get_filters_for_domain(self, domain, context):
- return {
- f.name: f.wrapped_definition(context)
- for f in self.filter(domain=domain, expression_type=UCR_NAMED_FILTER)
- }
- def get_expressions_for_domain(self,