gramex.config

Utilities to manage Gramex configurations.

These utilities handle objects:

These classes manage YAML config files as layered configurations with imports.

Some additional utilities are:

  • gramex.config.app_log is the default logger used by all of Gramex. Use gramex.config.app_log.error(...) to log errors
  • gramex.config.slug.filename(string) converts a string to a valid filename, replacing invalid characters with ‘-‘
  • gramex.config.slug.module(string) converts a string to a valid Python module name, replacing invalid characters with ‘_’
  • gramex.config.locate loads a module from a string
  • gramex.config.ioloop_running returns True if the Gramex is running, else False

walk(node)

Bottom-up recursive walk through a data structure yielding a (key, value, node) tuple for every entry. node[key] == value is true in every entry.

For example

list(walk([{‘x’: 1}])) [ (‘x’, 1, {‘x’: 1}), # leaf: key, value, node (0, {‘x’: 1}, [{‘x’: 1}]) # parent: index, value, node ]

Circular linkage can lead to a RuntimeError.

>>> x = {}
>>> x['x'] = x
>>> list(walk(x))
...
RuntimeError: maximum recursion depth exceeded
Source code in gramex\config.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def walk(node):
    '''
    Bottom-up recursive walk through a data structure yielding a (key, value,
    node) tuple for every entry. `node[key] == value` is true in every entry.

    For example:

        >>> list(walk([{'x': 1}]))
        [
            ('x', 1, {'x': 1}),         # leaf:   key, value, node
            (0, {'x': 1}, [{'x': 1}])   # parent: index, value, node
        ]

    Circular linkage can lead to a RuntimeError.

        >>> x = {}
        >>> x['x'] = x
        >>> list(walk(x))
        ...
        RuntimeError: maximum recursion depth exceeded
    '''
    if hasattr(node, 'items'):
        # Convert note.items() to list to prevent keys changing during iteration
        for key, value in list(node.items()):
            yield from walk(value)
            yield key, value, node
    elif isinstance(node, list):
        for index, value in enumerate(node):
            yield from walk(value)
            yield index, value, node

merge(old, new, mode='overwrite', warn=None, _path='')

Update old dict with new dict recursively.

>>> merge({'a': {'x': 1}}, {'a': {'y': 2}})
{'a': {'x': 1, 'y': 2}}

If new is a list, convert into a dict with random keys.

If mode='overwrite', the old dict is overwritten (default). If mode='setdefault', the old dict values are updated only if missing.

warn= is an optional list of key paths. Any conflict on dictionaries matching any of these paths is logged as a warning. For example, warn=['url.*', 'watch.*'] warns if any url: sub-key or watch: sub-key has a conflict.

Source code in gramex\config.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def merge(old, new, mode='overwrite', warn=None, _path=''):
    '''
    Update old dict with new dict recursively.

        >>> merge({'a': {'x': 1}}, {'a': {'y': 2}})
        {'a': {'x': 1, 'y': 2}}

    If `new` is a list, convert into a dict with random keys.

    If `mode='overwrite'`, the old dict is overwritten (default).
    If `mode='setdefault'`, the old dict values are updated only if missing.

    `warn=` is an optional list of key paths. Any conflict on dictionaries
    matching any of these paths is logged as a warning. For example,
    `warn=['url.*', 'watch.*']` warns if any url: sub-key or watch: sub-key
    has a conflict.
    '''
    for key in new:
        if key in old and hasattr(old[key], 'items') and hasattr(new[key], 'items'):
            path_key = _path + ('.' if _path else '') + str(key)
            if warn is not None:
                for pattern in warn:
                    if fnmatch(path_key, pattern):
                        app_log.warning(f'Duplicate key: {path_key}')
                        break
            merge(old=old[key], new=new[key], mode=mode, warn=warn, _path=path_key)
        elif mode == 'overwrite' or key not in old:
            old[key] = deepcopy(new[key])
    return old

objectpath(node, keypath, default=None)

Traverse down a dot-separated object path into dict items or object attrs. For example, objectpath(handler, 'request.headers.User-Agent') returns handler.request.headers['User-Agent']. Dictionary access is preferred. Returns None if the path is not found.

Source code in gramex\config.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def objectpath(node, keypath, default=None):
    '''
    Traverse down a dot-separated object path into dict items or object attrs.
    For example, `objectpath(handler, 'request.headers.User-Agent')` returns
    `handler.request.headers['User-Agent']`. Dictionary access is preferred.
    Returns `None` if the path is not found.
    '''
    for key in keypath.split('.'):
        if hasattr(node, '__getitem__'):
            node = node.get(key)
        else:
            node = getattr(node, key, None)
        if node is None:
            return default
    return node

CustomJSONEncoder

Encodes object to JSON, additionally converting datetime into ISO 8601 format

CustomJSONDecoder(args, kwargs)

Decodes JSON string, converting ISO 8601 datetime to datetime

Source code in gramex\config.py
214
215
216
217
def __init__(self, *args, **kwargs):
    self.old_object_pairs_hook = kwargs.get('object_pairs_hook')
    kwargs['object_pairs_hook'] = self.convert
    super(CustomJSONDecoder, self).__init__(*args, **kwargs)

recursive_encode(data, encoding='utf-8')

Convert Unicode to UTF-8 encoded byte strings in-place.

Source code in gramex\config.py
233
234
235
236
237
238
239
240
241
def recursive_encode(data, encoding='utf-8'):
    '''Convert Unicode to UTF-8 encoded byte strings in-place.'''
    for key, value, node in walk(data):
        if isinstance(key, str):
            newkey = key.encode(encoding)
            node[newkey] = node.pop(key)
            key = newkey
        if isinstance(value, str):
            node[key] = value.encode(encoding)

prune_keys(conf, keys={})

Returns a deep copy of a configuration removing specified keys.

prune_keys(conf, {'comment'}) drops the “comment” key from any dict or sub-dict.

Source code in gramex\config.py
244
245
246
247
248
249
250
251
252
253
def prune_keys(conf, keys={}):
    '''Returns a deep copy of a configuration removing specified keys.

    `prune_keys(conf, {'comment'})` drops the "comment" key from any dict or sub-dict.
    '''
    if isinstance(conf, dict):
        conf = AttrDict({k: prune_keys(v, keys) for k, v in conf.items() if k not in keys})
    elif isinstance(conf, (list, tuple)):
        conf = [prune_keys(v, keys) for v in conf]
    return conf

used_kwargs(method, kwargs, ignore_keywords=False)

Splits kwargs into those used by method, and those that are not.

Returns a tuple of (used, rest). used is a dict subset of kwargs with only keys used by method. rest has the remaining kwargs keys.

If the method uses **kwargs (keywords), it uses all keys. To ignore this and return only named arguments, use ignore_keywords=True.

Source code in gramex\config.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def used_kwargs(method, kwargs, ignore_keywords=False):
    '''
    Splits kwargs into those used by method, and those that are not.

    Returns a tuple of (used, rest). *used* is a dict subset of kwargs with only
    keys used by method. *rest* has the remaining kwargs keys.

    If the method uses `**kwargs` (keywords), it uses all keys. To ignore this
    and return only named arguments, use `ignore_keywords=True`.
    '''
    # In Pandas 1.5, DataFrame.to_csv and DataFrame.to_excel are wrapped with @deprecate_kwargs.
    # We dive deeper to detect the actual keywords. __wrapped__ is provided by functools.wraps
    # https://docs.python.org/3/library/functools.html
    while hasattr(method, '__wrapped__'):
        method = method.__wrapped__
    argspec = inspect.getfullargspec(method)
    # If method uses **kwargs, return all kwargs (unless you ignore **kwargs)
    if argspec.varkw and not ignore_keywords:
        used, rest = kwargs, {}
    else:
        # Split kwargs into 2 dicts -- used and rest
        used, rest = {}, {}
        for key, val in kwargs.items():
            target = used if key in set(argspec.args) else rest
            target[key] = val
    return used, rest

random_string(size, chars=_valid_key_chars)

Return random string of length size using chars (which defaults to alphanumeric)

Source code in gramex\config.py
287
288
289
290
def random_string(size, chars=_valid_key_chars):
    '''Return random string of length size using chars (which defaults to alphanumeric)'''
    # B311:random random() is safe since it's for non-cryptographic use
    return ''.join(choice(chars) for index in range(size))  # nosec B311

PathConfig(path, warn=None)

An AttrDict that is loaded from a path as a YAML file. For e.g., conf = PathConfig(path) loads the YAML file at path as an AttrDict. +conf reloads the path if required.

warn= is an optional list of key paths. Any conflict on dictionaries matching any of these paths is logged as a warning. For example, warn=['url.*', 'watch.*'] warns if any url: sub-key or watch: sub-key has a conflict.

Like http://configure.readthedocs.org/ but supports imports not inheritance. This lets us import YAML files in the middle of a YAML structure.

key:
    import:
        conf1: file1.yaml       # Import file1.yaml here
        conf2: file2.yaml       # Import file2.yaml here

Each PathConfig object has an __info__ attribute with the following keys:

info.path The path that this instance syncs with, stored as a pathlib.Path info.warn The keys to warn in case about in case of an import merge conflict info.imports A list of imported files, stored as an AttrDict with 2 attributes:

path
    The path that was imported, stored as a `pathlib.Path`
stat
    The `os.stat()` information about this file (or `None` if the
    file is missing.)
Source code in gramex\config.py
331
332
333
334
335
336
def __init__(self, path, warn=None):
    super(PathConfig, self).__init__()
    if warn is None:
        warn = self.duplicate_warn
    self.__info__ = AttrDict(path=Path(path), imports=[], warn=warn)
    self.__pos__()

__pos__()

+config reloads this config (if it has a path)

Source code in gramex\config.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
def __pos__(self):
    '''+config reloads this config (if it has a path)'''
    path = self.__info__.path

    # We must reload the layer if nothing has been imported...
    reload = not self.__info__.imports
    # ... or if an imported file is deleted / updated
    for imp in self.__info__.imports:
        exists = imp.path.exists()
        # If the path existed but has now been deleted, log it
        if not exists and imp.stat is not None:
            reload = True
            app_log.debug(f'Config deleted: {imp.path}')
            break
        if exists and (
            imp.path.stat().st_mtime > imp.stat.st_mtime
            or imp.path.stat().st_size != imp.stat.st_size
        ):
            reload = True
            app_log.info(f'Updated config: {imp.path}')
            break
    if reload:
        self.clear()
        self.update(_yaml_open(path))
        self.__info__.imports = load_imports(self, source=path, warn=self.__info__.warn)
    return self

ChainConfig

An AttrDict that manages multiple configurations as layers.

>>> config = ChainConfig([
...     ('base', PathConfig('gramex.yaml')),
...     ('app1', PathConfig('app.yaml')),
...     ('app2', AttrDict())
... ])

Any dict-compatible values are allowed. +config returns the merged values.

__pos__()

+config returns layers merged in order, removing null keys

Source code in gramex\config.py
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def __pos__(self):
    '''+config returns layers merged in order, removing null keys'''
    conf = AttrDict()
    for _name, config in self.items():
        if hasattr(config, '__pos__'):
            config.__pos__()
        merge(old=conf, new=config, mode='overwrite')

    # Remove keys where the value is None
    for key, value, node in list(walk(conf)):
        if value is None:
            del node[key]

    return conf

setup_variables()

Initialise variables

Source code in gramex\config.py
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def setup_variables():
    '''Initialise variables'''
    variables = DefaultAttrDict(str)
    # Load all environment variables, and overwrite with secrets
    variables.update(os.environ)
    variables.update(secrets)
    # GRAMEXPATH is the Gramex root directory
    variables['GRAMEXPATH'] = _gramex_path
    # GRAMEXAPPS is the Gramex apps directory
    variables['GRAMEXAPPS'] = os.path.join(_gramex_path, 'apps')
    # GRAMEXHOST is the hostname
    variables['GRAMEXHOST'] = socket.gethostname()
    # GRAMEXDATA varies based on OS
    if 'GRAMEXDATA' not in variables:
        if sys.platform.startswith('linux') or sys.platform == 'cygwin':
            variables['GRAMEXDATA'] = os.path.expanduser('~/.config/gramexdata')
        elif sys.platform == 'win32':
            variables['GRAMEXDATA'] = os.path.join(variables['LOCALAPPDATA'], 'Gramex Data')
        elif sys.platform == 'darwin':
            variables['GRAMEXDATA'] = os.path.expanduser(
                '~/Library/Application Support/Gramex Data'
            )
        else:
            variables['GRAMEXDATA'] = os.path.abspath('.')
            app_log.warning(f'$GRAMEXDATA set to {variables["GRAMEXDATA"]} for OS {sys.platform}')

    return variables

ConfigYAMLLoader(args, kwargs)

A YAML loader that loads a YAML file into an ordered AttrDict.

>>> attrdict = yaml.load(yaml_string, Loader=ConfigYAMLLoader)

If there are duplicate keys, this raises an error.

Source code in gramex\config.py
445
446
447
448
def __init__(self, *args, **kwargs):
    super(ConfigYAMLLoader, self).__init__(*args, **kwargs)
    self.add_constructor('tag:yaml.org,2002:map', _from_yaml)
    self.add_constructor('tag:yaml.org,2002:omap', _from_yaml)

load_imports(config, source, warn=None)

Post-process a config for imports.

config is the data to process. source is the path where it was loaded from.

If config has an import: key, treat all values below that as YAML files (specified relative to source) and import them in sequence.

Return a list of imported paths as :func:_pathstat objects. (This includes source.)

For example, if the source is base.yaml (which has the below configuration) and is loaded into config.

app:
    port: 20
    start: true
path: /
import: update*.yaml    # Can be any glob, e.g. */gramex.yaml

… and update.yaml looks like this.

app:
    port: 30
    new: yes

… then after this function is called, config looks like this.

app:
    port: 20        # From base.yaml. NOT updated by update.yaml
    start: true     # From base.yaml
    new: yes        # From update.yaml
path: /             # From base.yaml

The import: keys are deleted. The return value contains :func:_pathstat values for base.yaml and update.yaml in that order.

Multiple import: values can be specified as a dictionary.

import:
    first-app: app1/*.yaml
    next-app: app2/*.yaml

To import sub-keys as namespaces

import:
    app: {path: */gramex.yaml, namespace: 'url'}

This prefixes all keys under url:. Here are more examples.

namespace: True             # Add namespace to all top-level keys
namespace: url              # Add namespace to url.*
namespace: log.loggers      # Add namespace to log.loggers.*
namespace: [True, url]      # Add namespace to top level keys and url.*

By default, the prefix is the relative path of the imported YAML file (relative to the importer).

warn= is an optional list of key paths. Any conflict on dictionaries matching any of these paths is logged as a warning. For example, warn=['url.*', 'watch.*'] warns if any url: sub-key or watch: sub-key has a conflict.

Source code in gramex\config.py
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
def load_imports(config, source, warn=None):
    '''
    Post-process a config for imports.

    `config` is the data to process. `source` is the path where it was
    loaded from.

    If `config` has an `import:` key, treat all values below that as YAML
    files (specified relative to `source`) and import them in sequence.

    Return a list of imported paths as :func:_pathstat objects. (This includes
    `source`.)

    For example, if the `source` is  `base.yaml` (which has the below
    configuration) and is loaded into `config`.

        app:
            port: 20
            start: true
        path: /
        import: update*.yaml    # Can be any glob, e.g. */gramex.yaml

    ... and `update.yaml` looks like this.

        app:
            port: 30
            new: yes

    ... then after this function is called, `config` looks like this.

        app:
            port: 20        # From base.yaml. NOT updated by update.yaml
            start: true     # From base.yaml
            new: yes        # From update.yaml
        path: /             # From base.yaml

    The `import:` keys are deleted. The return value contains :func:_pathstat
    values for `base.yaml` and `update.yaml` in that order.

    Multiple `import:` values can be specified as a dictionary.

        import:
            first-app: app1/*.yaml
            next-app: app2/*.yaml

    To import sub-keys as namespaces

        import:
            app: {path: */gramex.yaml, namespace: 'url'}

    This prefixes all keys under `url:`. Here are more examples.

        namespace: True             # Add namespace to all top-level keys
        namespace: url              # Add namespace to url.*
        namespace: log.loggers      # Add namespace to log.loggers.*
        namespace: [True, url]      # Add namespace to top level keys and url.*

    By default, the prefix is the relative path of the imported YAML file
    (relative to the importer).

    `warn=` is an optional list of key paths. Any conflict on dictionaries
    matching any of these paths is logged as a warning. For example,
    `warn=['url.*', 'watch.*']` warns if any url: sub-key or watch: sub-key
    has a conflict.
    '''
    imported_paths = [_pathstat(source)]
    root = source.absolute().parent
    for key, value, node in list(walk(config)):
        if isinstance(key, str) and key.startswith('import.merge'):
            # Strip the top level key(s) from import.merge values
            if isinstance(value, dict):
                for name, conf in value.items():
                    node[name] = conf
            elif value:
                raise ValueError(f'import.merge: must be dict, not {value!r} at {source}')
            # Delete the import key
            del node[key]
        elif key == 'import':
            # Convert "import: path" to "import: {app: path}"
            if isinstance(value, str):
                value = {'apps': value}
            # Allow "import: [path, path]" to "import: {app0: path, app1: path}"
            elif isinstance(value, list):
                value = OrderedDict(((f'app{i}', conf) for i, conf in enumerate(value)))
            # By now, import: should be a dict
            elif not isinstance(value, dict):
                raise ValueError(f'import: must be string/list/dict, not {value!r} at {source}')
            # If already a dict with a single import via 'path', convert to dict of apps
            if 'path' in value:
                value = {'app': value}
            for name, conf in value.items():
                if not isinstance(conf, dict):
                    conf = AttrDict(path=conf)
                if 'path' not in conf:
                    raise ValueError(f'import: has no conf at {source}')
                paths = conf.pop('path')
                paths = paths if isinstance(paths, list) else [paths]
                globbed_paths = []
                for path in paths:
                    globbed_paths += sorted(root.glob(path)) if '*' in path else [Path(path)]
                ns = conf.pop('namespace', None)
                for path in globbed_paths:
                    abspath = root.joinpath(path)
                    new_conf = _yaml_open(abspath, **conf)
                    if ns is not None:
                        prefix = Path(path).as_posix()
                        new_conf = _add_ns(new_conf, ns, name + ':' + prefix)
                    imported_paths += load_imports(new_conf, source=abspath)
                    merge(old=node, new=new_conf, mode='setdefault', warn=warn)
            # Delete the import key
            del node[key]
    return imported_paths

locate(path, modules=[], forceload=0)

Locate an object by name or dotted path.

For example, locate('str') returns the str built-in. locate('gramex.handlers.FileHandler') returns the class gramex.handlers.FileHandler.

modules is a list of modules to search for the path in first. So locate('FileHandler', modules=[gramex.handlers]) will return gramex.handlers.FileHandler.

If importing raises an Exception, log it and return None.

Source code in gramex\config.py
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
def locate(path, modules=[], forceload=0):
    '''
    Locate an object by name or dotted path.

    For example, `locate('str')` returns the `str` built-in.
    `locate('gramex.handlers.FileHandler')` returns the class
    `gramex.handlers.FileHandler`.

    `modules` is a list of modules to search for the path in first. So
    `locate('FileHandler', modules=[gramex.handlers])` will return
    `gramex.handlers.FileHandler`.

    If importing raises an Exception, log it and return None.
    '''
    try:
        for module_name in modules:
            module = _locate(module_name, forceload)
            if hasattr(module, path):
                return getattr(module, path)
        return _locate(path, forceload)
    except ErrorDuringImport:
        app_log.exception(f'Exception when importing {path}')
        return None

TimedRotatingCSVHandler(args, kwargs)

Same as logging.handlers.TimedRotatingFileHandler, but writes to a CSV. The constructor accepts an additional keys list as input that has column keys. When .emit() is called, it expects an object with the same keys as keys.

Source code in gramex\config.py
823
824
825
def __init__(self, *args, **kwargs):
    self.keys = kwargs.pop('keys')
    super(TimedRotatingCSVHandler, self).__init__(*args, **kwargs)

ioloop_running(loop)

Returns whether the Tornado ioloop is running on not

Source code in gramex\config.py
859
860
861
862
def ioloop_running(loop):
    '''Returns whether the Tornado ioloop is running on not'''
    # TODO: Pressing Ctrl+C may cause this to raise an exception. Explore how to handle that
    return loop.asyncio_loop.is_running()

setup_secrets(path, max_age_days=1000000, clear=True)

Load <path> (which must be Path) as a YAML file. Update it into gramex.config.variables.

If there’s a SECRETS_URL: and SECRETS_KEY: key, the text from SECRETS_URL: is decrypted using secrets_key.

If there’s a SECRETS_IMPORT: string, list or dict, the values are treated as file patterns pointing to other secrets file to be imported.

Source code in gramex\config.py
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
def setup_secrets(path, max_age_days=1000000, clear=True):
    '''
    Load `<path>` (which must be Path) as a YAML file. Update it into gramex.config.variables.

    If there's a `SECRETS_URL:` and `SECRETS_KEY:` key, the text from `SECRETS_URL:` is
    decrypted using `secrets_key`.

    If there's a `SECRETS_IMPORT:` string, list or dict, the values are treated as file patterns
    pointing to other secrets file to be imported.
    '''
    if not path.is_file():
        return

    with path.open(encoding='utf-8') as handle:
        result = yaml.safe_load(handle)
    # Ignore empty .secrets.yaml
    if not result:
        return
    # If it's non-empty, it must be a dict
    if not isinstance(result, dict):
        raise ValueError(f'{path}: must be a YAML file with a single dict')
    # Clear secrets if we are re-initializing. Not if we're importing recursively.
    if clear:
        secrets.clear()
    # If SECRETS_URL: and SECRETS_KEY: are set, fetch secrets from URL and decrypted with the key.
    # This allows changing secrets remotely without access to the server.
    secrets_url = result.pop('SECRETS_URL', None)
    secrets_key = result.pop('SECRETS_KEY', None)
    if secrets_url and secrets_key:
        from urllib.request import urlopen
        from tornado.web import decode_signed_value

        app_log.info(f'Fetching remote secrets from {secrets_url}')
        # Load string from the URL -- but ignore comments. file:// URLs are fine too
        # B310:urllib_urlopen secrets can be local files or URLs
        value = yaml.safe_load(urlopen(secrets_url))  # nosec B310
        value = decode_signed_value(secrets_key, '', value, max_age_days=max_age_days)
        result.update(loads(value.decode('utf-8')))
    # If SECRETS_IMPORT: is set, fetch secrets from those file(s) as well.
    # SECRETS_IMPORT: can be a file pattern, or a list/dict of file patterns
    secrets_import = result.pop('SECRETS_IMPORT', None)
    if secrets_import:
        # Create a list of file patterns to import from
        imports = (
            list(secrets_import.values())
            if isinstance(secrets_import, dict)
            else secrets_import
            if isinstance(secrets_import, (list, tuple))
            else [secrets_import]
        )
        for pattern in imports:
            for import_path in path.parent.glob(pattern):
                setup_secrets(import_path, max_age_days, clear=False)
    secrets.update(result)