Source code for stetl.filters.sieve

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Lets data Packets pass-through, "sieve", based on criteria in their data.
# See issue:
# A concrete example is AttrValueRecordSieve which sieves records matching
# specific attribute values. One can also think of Sieves based on XPath expressions
# (e.g. for XML, GML), or geospatial, based on for example WFS-like filters like bounding boxes.
# Author: Just van den Broecke
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('Sieve')

[docs]class Sieve(Filter): """ ABC for specific Sieves that pass-through, "sieve", Packets based on criteria in their data. """ def __init__(self, configdict, section, consumes, produces): Filter.__init__(self, configdict, section, consumes, produces)
[docs] def invoke(self, packet): if is None: return packet return self.sieve(packet)
[docs] def sieve(self, packet): """ To be implemented in subclasses. :param packet: :return: """ return packet
[docs]class AttrValueRecordSieve(Sieve): """ Sieves by attr/value(s) in Record Packets. """
[docs] @Config(ptype=str, required=True) def attr_name(self): """ Name of attribute whose value(s) are to be sieved. """ pass
[docs] @Config(ptype=list, default=list(), required=False) def attr_values(self): """ Value(s) for attribute to be to sieved. If empty any value is passed through (existence of attr_name is criterium). """ pass
def __init__(self, configdict, section): Sieve.__init__(self, configdict, section, consumes=[FORMAT.record_array, FORMAT.record], produces=[FORMAT.record_array, FORMAT.record])
[docs] def sieve(self, packet): """ Filter out Packets that are not matching designated attr value(s). :param packet: :return: """ # Start with empty result: fill with matching records record_data = = None # Data can be list or single record if type(record_data) is list: = list() for record in record_data: if self.matches_attr(record): elif type(record_data) is dict: if self.matches_attr(record_data): = record_data return packet
def matches_attr(self, record): # Attr not even in record: no use going on if self.attr_name not in record: return False # Match if no value if not self.attr_values: return True return record[self.attr_name] in self.attr_values