Source code for everest.representers.csv

"""
CSV representers.

This file is part of the everest project. 
See LICENSE.txt for licensing, CONTRIBUTORS.txt for contributor information.

Created on May 19, 2011.
"""
from __future__ import absolute_import # Makes the import below absolute
from collections import OrderedDict
from csv import Dialect
from csv import QUOTE_ALL
from csv import QUOTE_NONNUMERIC
from csv import reader
from csv import register_dialect
from csv import writer
from everest.mime import CsvMime
from everest.representers.base import RepresentationGenerator
from everest.representers.base import RepresentationParser
from everest.representers.base import ResourceRepresenter
from everest.representers.config import RepresenterConfiguration
from everest.representers.dataelements import SimpleCollectionDataElement
from everest.representers.dataelements import SimpleLinkedDataElement
from everest.representers.dataelements import SimpleMemberDataElement
from everest.representers.mapping import SimpleMappingRegistry
from everest.representers.traversal import MappingDataElementTreeTraverser
from everest.representers.traversal import ResourceDataVisitor
from everest.representers.utils import get_mapping_registry
from everest.resources.utils import get_member_class
from everest.resources.utils import is_resource_url
from everest.resources.utils import provides_member_resource
from itertools import product

__docformat__ = 'reStructuredText en'
__all__ = ['CsvCollectionDataElement',
           'CsvData',
           'CsvDataElementTreeVisitor',
           'CsvLinkedDataElement',
           'CsvMappingRegistry',
           'CsvMemberDataElement',
           'CsvRepresentationGenerator',
           'CsvRepresentationParser',
           'CsvRepresenterConfiguration',
           'CsvResourceRepresenter',
           ]


class _DefaultExportDialect(Dialect): # ignore no __init__ pylint: disable=W0232
    """
    Default dialect to use when exporting resources to CSV.
    """
    delimiter = ','
    quotechar = '"'
    doublequote = True
    skipinitialspace = False
    lineterminator = '\n'
    quoting = QUOTE_NONNUMERIC
register_dialect('export', _DefaultExportDialect)


class _DefaultImportDialect(Dialect): # ignore no __init__ pylint: disable=W0232
    """
    Default dialect to use when importing resources from CSV.
    """
    delimiter = ','
    quotechar = '"'
    doublequote = True
    skipinitialspace = False
    lineterminator = '\n'
    quoting = QUOTE_ALL
register_dialect('import', _DefaultImportDialect)


class CsvRepresentationParser(RepresentationParser):

    def run(self):
        mp_reg = get_mapping_registry(CsvMime)
        is_member_rpr = provides_member_resource(self._resource_class)
        if is_member_rpr:
            member_cls = self._resource_class
            result_data_el = None
        else:
            # Collection resource: Create a wrapping collection data element.
            member_cls = get_member_class(self._resource_class)
            coll_mp = mp_reg.find_or_create_mapping(self._resource_class)
            coll_data_el = coll_mp.create_data_element()
            result_data_el = coll_data_el
        mb_mp = mp_reg.find_or_create_mapping(member_cls)
        csv_reader = reader(self._stream, self.get_option('dialect'))
        attrs = mb_mp.get_attribute_map()
        header = None
        for row in csv_reader:
            mb_data_el = mb_mp.create_data_element()
            if header is None:
                # Check if the header is valid.
                attr_names = attrs.keys()
                header = row
                for attr in header:
                    if not attr in attr_names:
                        raise ValueError('Invalid field "%s" in CSV input '
                                         'detected.' % attr)
                continue
            if len(row) != len(header):
                raise ValueError("Invalid row length (found: %s, expected: "
                                 "%s)." % (len(row), len(header)))
            for csv_attr, value in zip(header, row):
                if value == '':
                    value = None
                attr = attrs[csv_attr]
                if is_resource_url(value):
                    link = CsvLinkedDataElement.create(value, attr.kind)
                    mb_data_el.set_nested(attr.repr_name, link)
                else:
                    # Treat everything else as a terminal. We do not need
                    # to convert to a representation, so use set_terminal.
                    mb_data_el.set_terminal(attr.repr_name, value)
            if is_member_rpr:
                result_data_el = mb_data_el
            else:
                coll_data_el.add_member(mb_data_el)
        return result_data_el


class CsvData(object):
    def __init__(self, data=None):
        if data is None:
            data = {}
        self.fields = []
        self.data = []
        for attr_name, value in data.iteritems():
            if not isinstance(value, CsvData):
                self.fields.append(attr_name)
                if len(self.data) == 0:
                    self.data.append([value])
                else:
                    for row in self.data:
                        row.append(value)
            else:
                self.expand(value)

    def expand(self, other):
        if len(self.data) == 0:
            self.data = other.data
        else:
            new_data = []
            for self_row, other_row in list(product(self.data, other.data)):
                new_data.append(self_row + other_row)
            self.data = new_data
        self.fields = self.fields + other.fields

    def append(self, other):
        if len(self.data) == 0:
            self.data = other.data
            self.fields = other.fields
        else:
            for row in other.data:
                self.data.append(row)


class CsvDataElementTreeVisitor(ResourceDataVisitor):
    def __init__(self, encoding):
        ResourceDataVisitor.__init__(self)
        self.__encoding = encoding
        self.__csv_data = None

    def visit_member(self, attribute_key, attribute, member_node, member_data,
                     is_link_node, parent_data, index=None):
        if is_link_node:
            new_field_name = self.__get_field_name(attribute_key[:-1],
                                                   attribute)
            mb_data = CsvData({new_field_name:
                               self.__encode(member_node.get_url())})
        else:
            rpr_mb_data = OrderedDict()
            for attr, value in member_data.iteritems():
                new_field_name = self.__get_field_name(attribute_key, attr)
                rpr_mb_data[new_field_name] = value
            mb_data = CsvData(rpr_mb_data)
        if not index is None:
            # Collection member. Store in parent data with index as key.
            parent_data[index] = mb_data
        elif len(attribute_key) == 0:
            # Top level - store as CSV data..
            self.__csv_data = mb_data
        else:
            # Nested member. Store in parent data with attribute as key.
            parent_data[attribute] = mb_data

    def visit_collection(self, attribute_key, attribute, collection_node,
                         collection_data, is_link_node, parent_data):
        if is_link_node:
            new_field_name = self.__get_field_name(attribute_key[:-1],
                                                   attribute)
            coll_data = CsvData({new_field_name:collection_node.get_url()})
        else:
            coll_data = CsvData()
            for item in sorted(collection_data.items()):
                mb_data = item[1]
                coll_data.append(mb_data)
        if len(attribute_key) == 0:
            self.__csv_data = coll_data
        else:
            parent_data[attribute] = coll_data

    @property
    def csv_data(self):
        return self.__csv_data

    def __get_field_name(self, attribute_key, attribute):
        if attribute.name != attribute.repr_name:
            field_name = attribute.repr_name
        else:
            field_name = '.'.join(attribute_key + (attribute.name,))
        return self.__encode(field_name)

    def __encode(self, item):
        if isinstance(item, unicode):
            item = item.encode(self.__encoding)
        return item


[docs]class CsvRepresentationGenerator(RepresentationGenerator): """ A CSV writer for resource data. Handles linked resources and nested member and collection resources. """ def run(self, data_element): trv = MappingDataElementTreeTraverser(data_element, self._mapping) vst = CsvDataElementTreeVisitor(self.get_option('encoding')) trv.run(vst) csv_writer = writer(self._stream, dialect=self.get_option('dialect')) csv_data = vst.csv_data csv_writer.writerow(csv_data.fields) for row_data in csv_data.data: csv_writer.writerow(row_data)
class CsvResourceRepresenter(ResourceRepresenter): content_type = CsvMime #: The CSV dialect to use for exporting CSV data. CSV_EXPORT_DIALECT = 'export' #: The CSV dialect to use for importing CSV data. CSV_IMPORT_DIALECT = 'import' #: The encoding to use for exporting and importing CSV data. ENCODING = 'utf-8' @classmethod def make_mapping_registry(cls): return CsvMappingRegistry() def _make_representation_parser(self, stream, resource_class, mapping): parser = CsvRepresentationParser(stream, resource_class, mapping) parser.set_option('dialect', self.CSV_IMPORT_DIALECT) return parser def _make_representation_generator(self, stream, resource_class, mapping): generator = CsvRepresentationGenerator(stream, resource_class, mapping) generator.set_option('dialect', self.CSV_EXPORT_DIALECT) generator.set_option('encoding', self.ENCODING) return generator class CsvMemberDataElement(SimpleMemberDataElement): pass class CsvCollectionDataElement(SimpleCollectionDataElement): pass class CsvLinkedDataElement(SimpleLinkedDataElement): pass class CsvRepresenterConfiguration(RepresenterConfiguration): pass
[docs]class CsvMappingRegistry(SimpleMappingRegistry): """ Registry for CSV mappings. """ member_data_element_base_class = CsvMemberDataElement collection_data_element_base_class = CsvCollectionDataElement linked_data_element_base_class = CsvLinkedDataElement configuration_class = CsvRepresenterConfiguration

Project Versions