Source code for nucleon_elastic_ff.data.parsing

"""Routines for parsing information from data files and hdf addresses
"""
from typing import Union

from typing import Dict

import re

from nucleon_elastic_ff.utilities import set_up_logger

LOGGER = set_up_logger("nucleon_elastic_ff")


[docs]def parse_t_info(string: str) -> Dict[str, int]: r"""Extract `t0` and `tsep` info from string. The pattern matches e.g., ``proton_DD_dn_dn_t0_83_tsep_7_sink_mom_px0_py0_pz0``. Matches ``_t0_[0-9]+_tsep_[\-0-9]+_``. If no match is found, tries to identify ``t`` by the source location ``_x[0-9]+y[0-9]+z[0-9]+t[0-9]+`` and sets ``t0`` to ``t``. **Arguments** string: str The string to match **Returns** Dict[str, int]: Dictionary with keys for `t0` and `tsep` """ result = {} match = re.search(r"_t0_(?P<t0>[0-9]+)_tsep_(?P<tsep>[\-0-9]+)_", string) if match: for key, val in match.groupdict().items(): result[key] = int(val) else: match = re.findall(r"x[0-9]+_y[0-9]+_z[0-9]+_t([0-9]+)", string) if match: result["t0"] = int(match[0]) return result
[docs]def parse_file_info( filename: str, convert_numeric: bool = True ) -> Dict[str, Union[int, float, str]]: """Parses the filename and returns dict corresponding to file parameters. **Arguments** filename: str File that starts with `formfac_4D_<...>.h5` where the elipses are not optional. convert_numeric: bool = True Converts float & int strings to floats & ints. If false, leave them as a string. **Raises** ValueError If one key is not specified. """ pattern = ( r"(?P<type>formfac_4D[_a-z]*|spec_4D[_a-z]*)" r"(?:_a(?P<ensemble>[0-9a-zA-Z]+))?" r"(?:_(?P<stream>[a-z]+))?" r"(?:_(?P<cfg>[0-9]+))?" r"(?:_gf(?P<gf>[0-9\.]+))?" r"(?:_w(?P<w>[0-9\.]+))?" r"(?:_n(?P<n>[0-9]+))?" r"(?:_M(?P<M>[0-9\.]+))?" r"(?:_L(?P<L>[0-9]+))?" r"(?:_a(?P<aa>[0-9\.]+))?" r"(?:_mq(?P<mq>[0-9\.]+))?" r"(?:_px(?P<px>[0-9]+)py(?P<py>[0-9]+)pz(?P<pz>[0-9]+))?" r"(?:_dt(?P<dt>[0-9]+))?" r"(?:_Nsnk(?P<Nsnk>[0-9]+))?" r"_" r"(?:x(?P<x>[0-9]+)+y(?P<y>[0-9]+)z(?P<z>[0-9]+)t(?P<t>[0-9]+))|(?P<avg>src_avg)" r"(?:_(?P<stype>[a-zA-Z]+))?" r".h5" ) match = re.search(pattern, filename) if not match: raise ValueError("Was not able to parse file name `%s`." % filename) info = {} LOGGER.debug("Parsing info of `%s`", filename) for key, val in match.groupdict().items(): LOGGER.debug("%s == %s", key, val) if key in ["stype", "type", "stream", "avg", "ensemble"]: info[key] = val elif key in [ "cfg", "n", "L", "px", "py", "pz", "dt", "Nsnk", "x", "y", "z", "t", ]: info[key] = int(val) if convert_numeric and val is not None else val else: info[key] = float(val) if convert_numeric and val is not None else val return info