"""Routines for parsing information from data files and hdf addresses
"""
from typing import Union
from typing import Dict
import re
from nucleon_elastic_ff.utilities import set_up_logger
LOGGER = set_up_logger("nucleon_elastic_ff")
[docs]def parse_t_info(string: str) -> Dict[str, int]:
r"""Extract `t0` and `tsep` info from string.
The pattern matches e.g., ``proton_DD_dn_dn_t0_83_tsep_7_sink_mom_px0_py0_pz0``.
Matches ``_t0_[0-9]+_tsep_[\-0-9]+_``.
If no match is found, tries to identify ``t`` by the source location
``_x[0-9]+y[0-9]+z[0-9]+t[0-9]+`` and sets ``t0`` to ``t``.
**Arguments**
string: str
The string to match
**Returns**
Dict[str, int]:
Dictionary with keys for `t0` and `tsep`
"""
result = {}
match = re.search(r"_t0_(?P<t0>[0-9]+)_tsep_(?P<tsep>[\-0-9]+)_", string)
if match:
for key, val in match.groupdict().items():
result[key] = int(val)
else:
match = re.findall(r"x[0-9]+_y[0-9]+_z[0-9]+_t([0-9]+)", string)
if match:
result["t0"] = int(match[0])
return result
[docs]def parse_file_info(
filename: str, convert_numeric: bool = True
) -> Dict[str, Union[int, float, str]]:
"""Parses the filename and returns dict corresponding to file parameters.
**Arguments**
filename: str
File that starts with `formfac_4D_<...>.h5` where the elipses are not
optional.
convert_numeric: bool = True
Converts float & int strings to floats & ints.
If false, leave them as a string.
**Raises**
ValueError
If one key is not specified.
"""
pattern = (
r"(?P<type>formfac_4D[_a-z]*|spec_4D[_a-z]*)"
r"(?:_a(?P<ensemble>[0-9a-zA-Z]+))?"
r"(?:_(?P<stream>[a-z]+))?"
r"(?:_(?P<cfg>[0-9]+))?"
r"(?:_gf(?P<gf>[0-9\.]+))?"
r"(?:_w(?P<w>[0-9\.]+))?"
r"(?:_n(?P<n>[0-9]+))?"
r"(?:_M(?P<M>[0-9\.]+))?"
r"(?:_L(?P<L>[0-9]+))?"
r"(?:_a(?P<aa>[0-9\.]+))?"
r"(?:_mq(?P<mq>[0-9\.]+))?"
r"(?:_px(?P<px>[0-9]+)py(?P<py>[0-9]+)pz(?P<pz>[0-9]+))?"
r"(?:_dt(?P<dt>[0-9]+))?"
r"(?:_Nsnk(?P<Nsnk>[0-9]+))?"
r"_"
r"(?:x(?P<x>[0-9]+)+y(?P<y>[0-9]+)z(?P<z>[0-9]+)t(?P<t>[0-9]+))|(?P<avg>src_avg)"
r"(?:_(?P<stype>[a-zA-Z]+))?"
r".h5"
)
match = re.search(pattern, filename)
if not match:
raise ValueError("Was not able to parse file name `%s`." % filename)
info = {}
LOGGER.debug("Parsing info of `%s`", filename)
for key, val in match.groupdict().items():
LOGGER.debug("%s == %s", key, val)
if key in ["stype", "type", "stream", "avg", "ensemble"]:
info[key] = val
elif key in [
"cfg",
"n",
"L",
"px",
"py",
"pz",
"dt",
"Nsnk",
"x",
"y",
"z",
"t",
]:
info[key] = int(val) if convert_numeric and val is not None else val
else:
info[key] = float(val) if convert_numeric and val is not None else val
return info