Ticket #2002: 0001-ticket-2002-support-subdataset-levels-crs-filtering-.patch

File 0001-ticket-2002-support-subdataset-levels-crs-filtering-.patch, 13.2 KB (added by Dimitar Misev, 5 years ago)
  • applications/wcst_import/recipes_custom/sentinel2/recipe.py

    From 220033de28112882eab07d0b89839388a3872e19 Mon Sep 17 00:00:00 2001
    From: Dimitar Misev <misev@rasdaman.com>
    Date: Mon, 25 Feb 2019 08:37:29 +0100
    Subject: [PATCH] ticket:2002 - support subdataset / levels / crs filtering in
     sentinel2 recipe
    
    ---
     .../recipes_custom/sentinel2/recipe.py        | 70 +++++++++++---
     applications/wcst_import/session.py           |  8 ++
     doc/main/05_geo-services-guide.rst            | 93 ++++++++++++++++++-
     3 files changed, 152 insertions(+), 19 deletions(-)
    
    diff --git a/applications/wcst_import/recipes_custom/sentinel2/recipe.py b/applications/wcst_import/recipes_custom/sentinel2/recipe.py
    index 46e5dea8..64627d2e 100644
    a b from master.evaluator.evaluator_slice_factory import EvaluatorSliceFactory  
    2626from master.importer.importer import Importer
    2727from master.importer.multi_importer import MultiImporter
    2828from master.error.runtime_exception import RuntimeException
     29from master.error.validate_exception import RecipeValidationException
    2930from master.evaluator.sentence_evaluator import SentenceEvaluator
    3031from master.evaluator.expression_evaluator_factory import ExpressionEvaluatorFactory
    3132from master.helper.user_band import UserBand
    class Recipe(GeneralCoverageRecipe):  
    4849    # supported product levels
    4950    LVL_L1C = 'L1C'
    5051    LVL_L2A = 'L2A'
     52    LEVELS = [LVL_L1C, LVL_L2A]
     53
    5154    # resolutions in a single Sentinel 2 dataset; TCI (True Color Image) is 10m
    5255    RES_10m = '10m'
    5356    RES_20m = '20m'
    5457    RES_60m = '60m'
    5558    RES_TCI = 'TCI'
    56 
    57     RES_DICT = {RES_10m: [1, 10, -10], RES_20m: [1, 20, -20], RES_60m: [1, 60, -60], RES_TCI: [1, 10, -10]}
     59    # resolution (subdataset name) -> actual resolution numbers
     60    RES_DICT = {RES_10m: [1, 10, -10],
     61                RES_20m: [1, 20, -20],
     62                RES_60m: [1, 60, -60],
     63                RES_TCI: [1, 10, -10]}
     64    # list of subdatasets to import
     65    SUBDATASETS = [RES_10m, RES_20m, RES_60m, RES_TCI]
    5866
    5967    # variables that can be used to template the coverage id
    6068    VAR_CRS_CODE = '${crsCode}'
    class Recipe(GeneralCoverageRecipe):  
    98106        RES_TCI: BANDS_L1C[RES_TCI],
    99107    }
    100108    BANDS = { LVL_L1C: BANDS_L1C, LVL_L2A: BANDS_L2A }
    101     # number of subdatasets in a Sentinel 2 dataset
    102     SUBDATASETS = 4
    103109    DEFAULT_CRS = "OGC/0/AnsiDate@EPSG/0/${crsCode}"
    104110    DEFAULT_IMPORT_ORDER = GdalToCoverageConverter.IMPORT_ORDER_ASCENDING
    105111   
    class Recipe(GeneralCoverageRecipe):  
    110116    def __init__(self, session):
    111117        super(Recipe, self).__init__(session)
    112118        self._init_options()
    113         # subdatasets have a specific path scheme and prepending "file://" interferes with it
    114         # TODO: however uncommenting the below causes another error:
    115         #       The URL provided in the coverageRef parameter is malformed.
    116         # ConfigManager.root_url = ""
    117119
    118120    def validate(self):
    119121        super(Recipe, self).validate()
     122        if len(self.resolutions) == 0:
     123            raise RecipeValidationException("No resolutions to import provided.")
     124        for res in self.resolutions:
     125            if res not in self.SUBDATASETS:
     126                raise RecipeValidationException("Invalid resolution '" + str(res) +
     127                    "' provided, expected a subset of " + str(self.SUBDATASETS))
     128        for lvl in self.levels:
     129            if lvl not in self.LEVELS:
     130                raise RecipeValidationException("Invalid level '" + str(lvl) +
     131                    "' provided, expected a subset of " + str(self.LEVELS))
    120132
    121133    def describe(self):
    122134        log.info("The recipe has been validated and is ready to run.")
    class Recipe(GeneralCoverageRecipe):  
    152164   
    153165    def _init_options(self):
    154166        self._init_coverage_options()
     167        self._init_input_options()
    155168        self.coverage_id = self.session.get_coverage_id()
    156169        self.import_order = self._set_option(self.options, 'import_order', self.DEFAULT_IMPORT_ORDER)
    157170        self.wms_import = self._set_option(self.options, 'wms_import', False)
    class Recipe(GeneralCoverageRecipe):  
    163176        self.crs = self._set_option(covopts, 'crs', self.DEFAULT_CRS)
    164177        self._set_option(covopts, 'slicer', {})
    165178        self._init_slicer_options(covopts)
     179
     180    def _init_input_options(self):
     181        # specify a subset of resolutions to ingest
     182        inputopts = self.session.get_input()
     183        self.resolutions = self._set_option(inputopts, 'resolutions', None)
     184        if self.resolutions is None:
     185            self.resolutions = self._set_option(inputopts, 'subdatasets', None)
     186        if self.resolutions is None:
     187            self.resolutions = self.SUBDATASETS
     188        # allow to ingest data with only particular crss
     189        self.crss = self._set_option(inputopts, 'crss', [])
     190        # ingest data if it's the specified levels
     191        self.levels = self._set_option(inputopts, 'levels', [])
    166192   
    167193    def _init_slicer_options(self, covopts):
    168194        sliceropts = covopts['slicer']
    169195        self._set_option(sliceropts, 'type', 'gdal')
    170196        self._set_option(sliceropts, 'pixelIsPoint', False)
    171         if 'axes' not in sliceropts:
    172             self._init_axes_options(sliceropts)
     197        axesopts = self._init_axes_options()
     198        if 'axes' in sliceropts:
     199            for axis in sliceropts['axes']:
     200                if axis not in axesopts:
     201                    raise RecipeValidationException("Invalid axis '" + axis + "', expected one of ansi/E/N.")
     202                for k in sliceropts['axes'][axis]:
     203                    axesopts[axis][k] = sliceropts['axes'][axis][k]
     204        sliceropts['axes'] = axesopts
    173205   
    174     def _init_axes_options(self, sliceropts):
    175         sliceropts['axes'] = {
     206    def _init_axes_options(self):
     207        return {
    176208            'ansi': {
    177209                "min": "datetime(regex_extract('${file:path}', '.*?/S2[^_]+_MSI[^_]+_([\\d]+)T[\\d]+_', 1), 'YYYYMMDD')",
    178210                "gridOrder": 0,
    class Recipe(GeneralCoverageRecipe):  
    230262            gdal_ds.close()
    231263
    232264            level = self._get_level(f.get_filepath())
     265            if len(self.levels) > 0 and level not in self.levels:
     266                # skip file, as it's not in the list of levels provided in the ingredients file
     267                log.debug("Skipping " + level + " data")
     268                continue
    233269            crs_code = ""
    234270
    235271            evaluator_slice = None
    236272
    237             for res in [self.RES_10m, self.RES_20m, self.RES_60m, self.RES_TCI]:
     273            for res in self.resolutions:
    238274                subds_file = self._get_subdataset_file(subdatasets, res)
    239275                crs_code = self._get_crs_code(subds_file.get_filepath(), crs_code)
     276                if len(self.crss) > 0 and crs_code not in self.crss:
     277                    # skip CRS, it's not in the list of CRSs provided in the ingredients file
     278                    log.debug("Skipping data with CRS " + crs_code)
     279                    continue
    240280                cov_id = self._get_coverage_id(self.coverage_id, crs_code, level, res)
    241281                conv = self._get_convertor(convertors, cov_id, crs_code, level, res)
    242282
    class Recipe(GeneralCoverageRecipe):  
    263303   
    264304    def _get_subdatasets(self, gdal_ds, f):
    265305        subdatasets = gdal_ds.get_subdatasets()
    266         if len(subdatasets) != self.SUBDATASETS:
     306        if len(subdatasets) != len(self.SUBDATASETS):
    267307            raise RuntimeException("Cannot handle Sentinel 2 file " + f.get_filepath() +
    268308                                   ": GDAL reported " + str(len(subdatasets)) +
    269                                    " subdatasets, expected " + str(self.SUBDATASETS) + ".")
     309                                   " subdatasets, expected " + str(len(self.SUBDATASETS)) + ".")
    270310        return [name for (name, _) in subdatasets]
    271311
    272312    def _get_subdataset_file(self, subdatasets, res):
  • applications/wcst_import/session.py

    diff --git a/applications/wcst_import/session.py b/applications/wcst_import/session.py
    index 2cc053d8..a26d8d09 100644
    a b class Session:  
    5353        self.files = self.parse_input(inp['paths'] if 'paths' in inp else [])
    5454        self.coverage_id = inp['coverage_id'] if 'coverage_id' in inp else None
    5555        self.recipe = recipe
     56        self.input = inp
    5657        self.wcs_service = config['service_url'] if "service_url" in config else None
    5758        if "tmp_directory" in config:
    5859            self.tmp_directory = config['tmp_directory']
    class Session:  
    258259        """
    259260        return self.recipe
    260261
     262    def get_input(self):
     263        """
     264        Returns the input section of the ingredients
     265        :rtype dict[str,str]
     266        """
     267        return self.input
     268
    261269    def get_coverage_id(self):
    262270        """
    263271        Returns the coverage id for this session
  • doc/main/05_geo-services-guide.rst

    diff --git a/doc/main/05_geo-services-guide.rst b/doc/main/05_geo-services-guide.rst
    index eae156db..0e8f0c31 100644
    a b As of now, these recipes are provided:  
    16041604* :ref:`Irregular timeseries <data-import-recipe-irregular-timeseries>`
    16051605* :ref:`General coverage <data-import-recipe-general-coverage>`
    16061606* :ref:`Import from external WCS <data-import-recipe-wcs_extract>`
     1607* Specialized recipes
     1608
     1609    - :ref:`Sentinel 2 <data-import-recipe-sentinel2>`
    16071610
    16081611For each one of these there is an ingredients example under the
    16091612`ingredients/ <http://rasdaman.org/browser/applications/wcst_import/ingredients>`_
    petascope. Parameters are explained below.  
    25602563    {
    25612564      "config": {
    25622565        "service_url": "http://localhost:8080/rasdaman/ows",
    2563         "tmp_directory": "/tmp/",
    25642566        "default_crs": "http://localhost:8080/def/crs/EPSG/0/4326",
    2565         "mock": false,
    2566         "automated": true,
    2567         "track_files": false
     2567        "automated": true
    25682568      },
    25692569      "input": {
    25702570        "coverage_id": "test_wcs_extract"
    petascope. Parameters are explained below.  
    25882588    }
    25892589
    25902590
     2591.. _data-import-recipe-sentinel2:
     2592
     2593Import Sentinel 2 data
     2594^^^^^^^^^^^^^^^^^^^^^^
     2595
     2596This is a convenience recipe for importing Sentinel 2 data in particular. It
     2597relies on support for Sentinel 2 in `more recent GDAL versions
     2598<https://gdal.org/frmt_sentinel2.html>`__. Importing zipped Sentinel 2 is also
     2599possible and automatically handled.
     2600
     2601Below is an example:
     2602
     2603.. code-block:: json
     2604
     2605    {
     2606      "config": {
     2607        "service_url": "http://localhost:8080/rasdaman/ows",
     2608        "automated": true
     2609      },
     2610      "input": {
     2611        "coverage_id": "S2_${crsCode}_${resolution}_${level}",
     2612        "paths": [ "S2*.zip" ],
     2613        // Optional filtering settings
     2614        "resolutions": ["10m", "20m", "60m", "TCI"],
     2615        "levels": ["L1C", "L2A"],
     2616        "crss": ["32757"] // remove or leave empty to ingest any CRS
     2617      },
     2618      "recipe": {
     2619        "name": "sentinel2",
     2620        "options": {
     2621          "coverage": {
     2622            "metadata": {
     2623              "type": "xml",
     2624              "global": {
     2625                "Title": "'Sentinel-2 data served by rasdaman'"
     2626              }
     2627            }
     2628          },
     2629          "tiling": "ALIGNED [0:0, 0:1999, 0:1999] TILE SIZE 32000000",
     2630          "wms_import": true
     2631        }
     2632      }
     2633    }
     2634
     2635The recipe extends `general_coverage <data-import-recipe-wcs_extract>`_ so
     2636the ``"recipe"`` section has the same structure. However, a lot of information
     2637is automatically filled in by the recipe now, so the ingredients file is much
     2638simpler as the example above shows.
     2639
     2640The other obvious difference is that the ``"coverage_id"`` is templated with
     2641several variables enclosed in ``${`` and ``}`` which are automatically replaced
     2642to generate the actual coverage name during import:
     2643
     2644- ``crsCode`` - the CRS EPSG code of the imported files, e.g. ``32757`` for
     2645  WGS 84 / UTM zone 57S.
     2646
     2647- ``resolution`` - Sentinel 2 products bundle several subdatasets of different
     2648  resolutions: ``10m``, ``20m``, ``60m``, and ``TCI`` (True Color Image, also
     2649  10m as it is derived from the B02, B03, and B04 10m bands).
     2650
     2651- ``level`` - ``L1C`` or ``L2A``
     2652
     2653If the files collected by ``"paths"`` are varying in any of these parameters,
     2654the corresponding variables must appear somewhere in the ``"coverage_id"``.
     2655Otherwise, the ingestion will either fail or result in invalid coverages. E.g.
     2656if all data is level ``L1C`` with CRS ``32757``, but still different
     2657resolutions, the ``"coverage_id"`` could be ``"MyCoverage_${resolution}"``;
     2658the other variables can still be specified though, so
     2659``"MyCoverage_${resolution}_${crsCode}"`` is valid as well.
     2660
     2661In addition, the data to be ingested can be optionall filtered with the
     2662following options in the ``"input"`` section:
     2663
     2664- ``resolutions`` - specify a subset of resolutions to ingest from the data,
     2665  e.g. only the "10m" subdataset; if not specified, data of all supported
     2666  resolutions will be ingested.
     2667
     2668- ``levels`` - specify a subset of levels to ingest, so that files of other
     2669  levels will be fully skipped; if not specified, data of all supported levels
     2670  will be ingested.
     2671
     2672- ``crss`` - specify a subset of CRSs to ingest; if not specified or empty
     2673  data of any CRS will be ingested.
     2674
     2675
    25912676.. _wms-image-pyramids:
    25922677
    25932678Image pyramids