attribute_filter.rb | searchcode

/lib/new_relic/agent/attribute_filter.rb

http://github.com/newrelic/rpm
Ruby | 301 lines | 174 code | 44 blank | 83 comment | 28 complexity | d555bf5a2a9ace5083fe273d90a3debb MD5 | raw file
Possible License(s): Apache-2.0

# encoding: utf-8
# This file is distributed under New Relic's license terms.
# See https://github.com/newrelic/newrelic-ruby-agent/blob/main/LICENSE for complete details.

# This class applies filtering rules as specified in the Agent Attributes
# cross-agent spec.
#
# Instances of it are constructed by deriving a set of rules from the agent
# configuration. Instances are immutable once they are constructed - if the
# config changes, a new instance should be constructed and swapped in to
# replace the existing one.
#
# The #apply method is the main external interface of this class. It takes an
# attribute name and a set of default destinations (represented as a bitfield)
# and returns a set of actual destinations after applying the filtering rules
# specified in the config.
#
# Each set of destinations is represented as a bitfield, where the bit positions
# specified in the DST_* constants are used to indicate whether an attribute
# should be sent to the corresponding destination.
#
# The choice of a bitfield here rather than an Array was made to avoid the need
# for any transient object allocations during the application of rules. Since
# rule application will happen once per attribute per transaction, this is a hot
# code path.
#
# The algorithm for applying filtering rules is as follows:
#
# 1. Start with a bitfield representing the set of default destinations passed
#    in to #apply.
# 2. Mask this bitfield against the set of destinations that have attribute
#    enabled at all.
# 3. Traverse the list of rules in order (more on the ordering later), applying
#    each matching rule, but taking care to not let rules override the enabled
#    status of each destination. Each matching rule may mutate the bitfield.
# 4. Return the resulting bitfield after all rules have been applied.
#
# Each rule consists of a name, a flag indicating whether it ends with a
# wildcard, a bitfield representing the set of destinations that it applies to,
# and a flag specifying whether it is an include or exclude rule.
#
# During construction, rules are sorted according to the following criteria:
#
# 1. First, the names are compared lexicographically. This has the impact of
#    forcing shorter (more general) rules towards the top of the list and longer
#    (more specific) rules towards the bottom. This is important, because the
#    Agent Attributes spec stipulates that the most specific rule for a given
#    destination should take precedence. Since rules are applied top-to-bottom,
#    this sorting guarantees that the most specific rule will be applied last.
# 2. If the names are identical, we next examine the wildcard flag. Rules ending
#    with a wildcard are considered more general (and thus 'less than') rules
#    not ending with a wildcard.
# 3. If the names and wildcard flags are identical, we next examine whether the
#    rules being compared are include or exclude rules. Exclude rules have
#    precedence by the spec, so they are considered 'greater than' include
#    rules.
#
# This approach to rule evaluation was taken from the PHP agent's
# implementation.
#

module NewRelic
  module Agent
    class AttributeFilter
      DST_NONE = 0x0

      DST_TRANSACTION_EVENTS   = 1 << 0
      DST_TRANSACTION_TRACER   = 1 << 1
      DST_ERROR_COLLECTOR      = 1 << 2
      DST_BROWSER_MONITORING   = 1 << 3
      DST_SPAN_EVENTS          = 1 << 4
      DST_TRANSACTION_SEGMENTS = 1 << 5

      DST_ALL                  = 0x3f

      attr_reader :rules

      def initialize(config)
        @enabled_destinations = DST_NONE

        @enabled_destinations |= DST_TRANSACTION_TRACER   if config[:'transaction_tracer.attributes.enabled']
        @enabled_destinations |= DST_TRANSACTION_EVENTS   if config[:'transaction_events.attributes.enabled']
        @enabled_destinations |= DST_ERROR_COLLECTOR      if config[:'error_collector.attributes.enabled']
        @enabled_destinations |= DST_BROWSER_MONITORING   if config[:'browser_monitoring.attributes.enabled']
        @enabled_destinations |= DST_SPAN_EVENTS          if config[:'span_events.attributes.enabled']
        @enabled_destinations |= DST_TRANSACTION_SEGMENTS if config[:'transaction_segments.attributes.enabled']

        @enabled_destinations = DST_NONE unless config[:'attributes.enabled']

        @rules = []

        build_rule(config[:'attributes.exclude'], DST_ALL, false)
        build_rule(config[:'transaction_tracer.attributes.exclude'],   DST_TRANSACTION_TRACER,   false)
        build_rule(config[:'transaction_events.attributes.exclude'],   DST_TRANSACTION_EVENTS,   false)
        build_rule(config[:'error_collector.attributes.exclude'],      DST_ERROR_COLLECTOR,      false)
        build_rule(config[:'browser_monitoring.attributes.exclude'],   DST_BROWSER_MONITORING,   false)
        build_rule(config[:'span_events.attributes.exclude'],          DST_SPAN_EVENTS,          false)
        build_rule(config[:'transaction_segments.attributes.exclude'], DST_TRANSACTION_SEGMENTS, false)

        build_rule(['request.parameters.*'], include_destinations_for_capture_params(config[:capture_params]), true)
        build_rule(['job.resque.args.*'],    include_destinations_for_capture_params(config[:'resque.capture_params']), true)
        build_rule(['job.sidekiq.args.*'],   include_destinations_for_capture_params(config[:'sidekiq.capture_params']), true)

        build_rule(['host', 'port_path_or_id'], DST_TRANSACTION_SEGMENTS, config[:'datastore_tracer.instance_reporting.enabled'])
        build_rule(['database_name'],           DST_TRANSACTION_SEGMENTS, config[:'datastore_tracer.database_name_reporting.enabled'])

        build_rule(config[:'attributes.include'], DST_ALL, true)
        build_rule(config[:'transaction_tracer.attributes.include'],   DST_TRANSACTION_TRACER,   true)
        build_rule(config[:'transaction_events.attributes.include'],   DST_TRANSACTION_EVENTS,   true)
        build_rule(config[:'error_collector.attributes.include'],      DST_ERROR_COLLECTOR,      true)
        build_rule(config[:'browser_monitoring.attributes.include'],   DST_BROWSER_MONITORING,   true)
        build_rule(config[:'span_events.attributes.include'],          DST_SPAN_EVENTS,          true)
        build_rule(config[:'transaction_segments.attributes.include'], DST_TRANSACTION_SEGMENTS, true)

        build_uri_rule(config[:'attributes.exclude'])

        @rules.sort!

        # We're ok to cache high security for fast lookup because the attribute
        # filter is re-generated on any significant config change.
        @high_security = config[:high_security]

        setup_key_cache
        cache_prefix_denylist
      end

      # Note the key_cache is a global cache, accessible by multiple threads,
      # but is intentionally left unsynchronized for liveness. Writes will always
      # involve writing the same boolean value for each key, so there is no
      # worry of one value clobbering another. For reads, if a value hasn't been
      # written to the cache yet, the worst that will happen is that it will run
      # through the filter rules again. Both reads and writes will become
      # eventually consistent.

      def setup_key_cache
        destinations = [
          DST_TRANSACTION_EVENTS,
          DST_TRANSACTION_TRACER,
          DST_ERROR_COLLECTOR,
          DST_BROWSER_MONITORING,
          DST_SPAN_EVENTS,
          DST_TRANSACTION_SEGMENTS,
          DST_ALL
        ]

        @key_cache = destinations.inject({}) do |memo, destination|
          memo[destination] = {}
          memo
        end
      end

      def include_destinations_for_capture_params(capturing)
        if capturing
          DST_TRANSACTION_TRACER | DST_ERROR_COLLECTOR
        else
          DST_NONE
        end
      end

      def build_rule(attribute_names, destinations, is_include)
        attribute_names.each do |attribute_name|
          rule = AttributeFilterRule.new(attribute_name, destinations, is_include)
          @rules << rule unless rule.empty?
        end
      end

      def build_uri_rule(excluded_attributes)
        uri_aliases = %w(uri url request_uri request.uri http.url)

        if (excluded_attributes & uri_aliases).size > 0
          build_rule(uri_aliases - excluded_attributes, DST_ALL, false)
        end
      end

      def apply(attribute_name, default_destinations)
        return DST_NONE if @enabled_destinations == DST_NONE

        destinations   = default_destinations
        attribute_name = attribute_name.to_s

        @rules.each do |rule|
          if rule.match?(attribute_name)
            if rule.is_include
              destinations |= rule.destinations
            else
              destinations &= rule.destinations
            end
          end
        end

        destinations & @enabled_destinations
      end

      def allows?(allowed_destinations, requested_destination)
        allowed_destinations & requested_destination == requested_destination
      end

      def allows_key?(key, destination)
        return false unless destination & @enabled_destinations == destination

        value = @key_cache[destination][key]

        if value.nil?
          allowed_destinations = apply(key, destination)
          @key_cache[destination][key] = allows?(allowed_destinations, destination)
        else
          value
        end
      end

      def high_security?
        @high_security
      end

      # For attribute prefixes where we know the default destinations will
      # always be DST_NONE, we can statically determine that any attribute
      # starting with the prefix will not be allowed unless there's an include
      # rule that might match attributes starting with it.
      #
      # This allows us to skip significant preprocessing work (hash/array
      # flattening and type coercion) for HTTP request parameters and job
      # arguments for Sidekiq and Resque in the common case, since none of
      # these attributes are captured by default.
      #
      def cache_prefix_denylist
        @prefix_denylist = {}
        @prefix_denylist[:'request.parameters'] = true unless might_allow_prefix_uncached?(:'request.parameters')
        @prefix_denylist[:'job.sidekiq.args']   = true unless might_allow_prefix_uncached?(:'job.sidekiq.args')
        @prefix_denylist[:'job.resque.args']    = true unless might_allow_prefix_uncached?(:'job.resque.args')
      end

      # Note that the given prefix *must* be a Symbol
      def might_allow_prefix?(prefix)
        !@prefix_denylist.include?(prefix)
      end

      def might_allow_prefix_uncached?(prefix)
        prefix = prefix.to_s
        @rules.any? do |rule|
          if rule.is_include
            if rule.wildcard
              if rule.attribute_name.size > prefix.size
                rule.attribute_name.start_with?(prefix)
              else
                prefix.start_with?(rule.attribute_name)
              end
            else
              rule.attribute_name.start_with?(prefix)
            end
          end
        end
      end
    end

    class AttributeFilterRule
      attr_reader :attribute_name, :destinations, :is_include, :wildcard

      def initialize(attribute_name, destinations, is_include)
        @attribute_name = attribute_name.sub(/\*$/, "")
        @wildcard       = attribute_name.end_with?("*")
        @is_include     = is_include
        @destinations   = is_include ? destinations : ~destinations
      end

      # Rules are sorted from least specific to most specific
      #
      # All else being the same, wildcards are considered less specific
      # All else being the same, include rules are less specific than excludes
      def <=>(other)
        name_cmp = @attribute_name <=> other.attribute_name
        return name_cmp unless name_cmp == 0

        if wildcard != other.wildcard
          return wildcard ? -1 : 1
        end

        if is_include != other.is_include
          return is_include ? -1 : 1
        end

        return 0
      end

      def match?(name)
        if wildcard
          name.start_with?(@attribute_name)
        else
          @attribute_name == name
        end
      end

      def empty?
        if is_include
          @destinations == AttributeFilter::DST_NONE
        else
          @destinations == AttributeFilter::DST_ALL
        end
      end
    end
  end
end