# !/usr/bin/env python
#
# classes.py
"""
Main classes for the worklist parser.
"""
#
# Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.
#
# stdlib
from pprint import pformat
from typing import Any, Dict, List, Optional, Sequence, Union
from uuid import UUID
# 3rd party
import attr
import lxml.etree # type: ignore
import pandas # type: ignore
from attr_utils.docstrings import add_attrs_doc
from attr_utils.serialise import serde
from domdf_python_tools.bases import Dictable
from domdf_python_tools.doctools import prettify_docstrings
# this package
from mh_utils.utils import element_to_bool, strip_string
from mh_utils.worklist_parser.columns import Column, columns
from mh_utils.worklist_parser.enums import AttributeType
from mh_utils.worklist_parser.parser import parse_params, parse_sample_info
from mh_utils.xml import XMLFileMixin
__all__ = ["JobData", "Worklist", "Checksum", "Macro", "Attribute"]
pandas.DataFrame.__module__ = "pandas"
[docs]class JobData(Dictable):
"""
Represents an entry in the worklist.
:param id: The ID of the job.
:param job_type: The type of job. TODO: enum of values
:param run_status: The status of the analysis. TODO: enum of values
:param sample_info: Optional ``key: value`` mapping of information about the sample.
"""
def __init__(
self,
id: Union[str, UUID], # noqa: A002 # pylint: disable=redefined-builtin
job_type: int,
run_status: int,
sample_info: Optional[dict] = None,
):
super().__init__()
if isinstance(id, UUID):
self.id = id
else:
self.id = UUID(str(id))
self.job_type = int(job_type)
self.run_status = int(run_status)
if sample_info:
self.sample_info = sample_info
else:
self.sample_info = {}
__slots__ = ["id", "job_type", "run_status", "sample_info"]
# dtypes
# 8: Str
# Inj Vol, Dilution and Equilib Time (min) 5
[docs] @classmethod
def from_xml(
cls,
element: lxml.objectify.ObjectifiedElement,
user_columns: Optional[Dict[str, Column]] = None,
) -> "JobData":
"""
Construct a :class:`~.JobData` object from an XML element.
:param element: The XML element to parse the data from
:param user_columns: Optional mapping of user column labels to
:class:`~mh_utils.worklist_parser.columns.Column` objects.
"""
return cls(
id=element.ID,
job_type=element.JobType,
run_status=element.RunStatus,
sample_info=parse_sample_info(element.SampleInfo, user_columns),
)
@property
def __dict__(self):
data = {}
for key in self.__slots__:
if key == "id":
data[key] = str(self.id)
else:
data[key] = getattr(self, key)
return data
def __repr__(self) -> str:
values = ", ".join(f"{key}={val!r}" for key, val in iter(self) if key != "sample_info")
return f"{self.__class__.__name__}({values})"
[docs]@prettify_docstrings
class Worklist(XMLFileMixin, Dictable):
"""
Class that represents an Agilent MassHunter worklist.
:param version: WorklistInfo version number
:param locked_run_mode: Flag to indicate whether the data was acquired in locked mode. Yes = -1. No = 0.
:param instrument_name: The name of the instrument.
:param params: Mapping of parameter names to values. TODO: Check
:param user_columns: Mapping of user columns to ??? TODO
:param jobs:
:param checksum: The checksum of the worklist file. The format is unknown.
"""
def __init__(
self,
version: float,
locked_run_mode: bool,
instrument_name: str,
params: dict,
user_columns: dict,
jobs: Sequence[JobData],
checksum: "Checksum",
):
super().__init__()
self.version = float(version)
self.locked_run_mode = bool(locked_run_mode)
self.instrument_name = str(instrument_name)
self.params = params
self.user_columns = user_columns
self.jobs = list(jobs)
self.checksum = checksum
__slots__ = ["version", "user_columns", "jobs", "checksum", "locked_run_mode", "instrument_name", "params"]
@property
def __dict__(self):
data = {}
for key in self.__slots__:
data[key] = getattr(self, key)
return data
[docs] @classmethod
def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Worklist":
"""
Construct a :class:`~.Worklist` object from an XML element.
"""
version = float(element.Version)
checksum = Checksum.from_xml(element.Checksum)
WorklistInfo = element.WorklistInfo
if WorklistInfo.LockedRunMode == -1:
locked_run_mode = True
elif WorklistInfo.LockedRunMode == 0:
locked_run_mode = False
else:
raise ValueError("Unknown value for 'LockedRunMode'")
instrument_name = str(WorklistInfo.Instrument)
params = parse_params(WorklistInfo.Params)
attributes_list: List[Attribute] = []
jobs_list: List[JobData] = []
user_columns: Dict[str, Column] = {}
for attribute in WorklistInfo.AttributeInformation.iterchildren("Attributes"):
attribute = Attribute.from_xml(attribute)
attributes_list.append(attribute)
if attribute.attribute_type != AttributeType.SystemDefined:
column = Column.from_attribute(attribute)
user_columns[column.name] = column
for job in WorklistInfo.JobDataList.iterchildren("JobData"):
jobs_list.append(JobData.from_xml(job, user_columns))
return cls(
version=version,
locked_run_mode=locked_run_mode,
instrument_name=instrument_name,
params=params,
user_columns=user_columns,
jobs=jobs_list,
checksum=checksum,
)
[docs] def __repr__(self) -> str:
return f"{self.__class__.__name__}({pformat(dict(self))})"
[docs] def as_dataframe(self) -> pandas.DataFrame:
"""
Returns the :class:`~.Worklist` as a :class:`pandas.DataFrame`.
:rtype:
.. clearpage::
"""
headers = [col for col in columns] + [col for col in self.user_columns]
data = []
for job in self.jobs:
row = []
for header_label in headers:
row.append(job.sample_info[header_label])
data.append(row)
# TODO: Sort columns by "reorder_id"
return pandas.DataFrame(data, columns=headers)
[docs]@serde
@add_attrs_doc
@attr.s(slots=True)
class Checksum:
"""
Represents a checksum for a worklist.
The format of the checksum is unknown.
"""
SchemaVersion: int = attr.ib(converter=int)
ALGO_VERSION: int = attr.ib(converter=int)
HASHCODE: str = attr.ib(converter=str)
[docs] @classmethod
def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Checksum":
"""
Construct a :class:`~.Checksum` object from an XML element.
"""
return cls(
SchemaVersion=element.attrib["SchemaVersion"],
ALGO_VERSION=element.attrib["ALGO_VERSION"],
HASHCODE=element.MAIN.attrib["HASHCODE"]
)
[docs]@serde
@add_attrs_doc
@attr.s(slots=True, repr=False)
class Macro:
"""
Represents a macro in a worklist.
:param output_parameter: .
"""
project_name: str = attr.ib(converter=strip_string)
procedure_name: str = attr.ib(converter=strip_string)
input_parameter: str = attr.ib(converter=strip_string)
output_data_type: int = attr.ib(converter=int)
output_parameter: str = attr.ib(converter=strip_string)
"""
.. clearpage::
"""
display_string: str = attr.ib(converter=strip_string)
# TODO: enum for output_data_type
[docs] @classmethod
def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Macro":
"""
Construct a :class:`~.Macro` object from an XML element.
"""
return cls(
project_name=element.ProjectName,
procedure_name=element.ProcedureName,
input_parameter=element.InputParameter,
output_data_type=element.OutputDataType,
output_parameter=element.OutputParameter,
display_string=element.DisplayString,
)
@property
def undefined(self) -> bool:
"""
Returns whether the macro is undefined.
"""
return all([
self.project_name == '',
self.procedure_name == '',
self.input_parameter == '',
self.output_data_type == 0,
self.output_parameter == '',
self.display_string == '',
])
def __repr__(self) -> str:
if self.undefined:
return f"{self.__class__.__name__}(Undefined)"
else:
slots = self.__slots__ # type: ignore[attr-defined] # attrs adds __slots__ but mypy doesn't know
values = ", ".join(f"{x}={getattr(self, x)!r}" for x in slots if x != "__weakref__")
return f"{self.__class__.__name__}({values})"
[docs]@serde
@add_attrs_doc
@attr.s(slots=True)
class Attribute:
r"""
Represents an Attribute.
.. raw:: latex
\begin{multicols}{2}
:param attribute_type: The attribute type identifier.
:param field_type: The field type identifier.
.. raw:: latex
\end{multicols}
.. clearpage::
"""
attribute_id: int = attr.ib(converter=int)
attribute_type: AttributeType = attr.ib(converter=AttributeType)
"""
The attribute type identifier.
Can be System Defined (``0``), System Used (``1``), or User Added (``2``).
"""
field_type: int = attr.ib(converter=int)
"""
The field type identifier.
Each of the system defined columns have a field type starting from sampleid = 0 to reserved6 = 24.
The system used column can be 'compound param' = 35, 'optim param' = 36,
'mass param' = 37 and 'protein param' = 38.
The User added columns start from 45.
.. clearpage::
"""
system_name: str = attr.ib(converter=strip_string)
header_name: str = attr.ib(converter=strip_string)
# TODO: determine data_type and use it to cast the values and the default value
# Perhaps
# DataFileValuedata_type = bdict(
# Unspecified=0,
# Byte=1,
# Int16=2,
# Int32=3,
# Int64=4,
# Float32=5,
# Float64=6,
# )
data_type: Any = attr.ib(converter=int)
default_data_value: str = attr.ib(converter=strip_string)
reorder_id: int = attr.ib(converter=int)
show_hide_status: bool = attr.ib(converter=element_to_bool)
column_width: int = attr.ib(converter=int)
# TODO: enum for output_data_type
[docs] @classmethod
def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Attribute":
"""
Construct an :class:`~.Attribute` object from an XML element.
"""
return cls(
attribute_id=element.AttributeID,
attribute_type=element.AttributeType,
field_type=element.FieldType,
system_name=element.SystemName,
header_name=element.HeaderName,
data_type=element.DataType,
default_data_value=element.DefaultDataValue,
reorder_id=element.ReorderID,
show_hide_status=element.ShowHideStatus,
column_width=element.ColumnWidth,
)