Source code for mh_utils.worklist_parser.classes

#  !/usr/bin/env python
#
#  classes.py
"""
Main classes for the worklist parser.
"""
#
#  Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from pprint import pformat
from typing import Any, Dict, List, Optional, Sequence, Union
from uuid import UUID

# 3rd party
import attr
import lxml.etree  # type: ignore
import pandas  # type: ignore
from attr_utils.docstrings import add_attrs_doc
from attr_utils.serialise import serde
from domdf_python_tools.bases import Dictable
from domdf_python_tools.doctools import prettify_docstrings

# this package
from mh_utils.utils import element_to_bool, strip_string
from mh_utils.worklist_parser.columns import Column, columns
from mh_utils.worklist_parser.enums import AttributeType
from mh_utils.worklist_parser.parser import parse_params, parse_sample_info
from mh_utils.xml import XMLFileMixin

__all__ = ["JobData", "Worklist", "Checksum", "Macro", "Attribute"]

pandas.DataFrame.__module__ = "pandas"


[docs]class JobData(Dictable):
	"""
	Represents an entry in the worklist.

	:param id: The ID of the job.
	:param job_type: The type of job. TODO: enum of values
	:param run_status: The status of the analysis.  TODO: enum of values
	:param sample_info: Optional ``key: value`` mapping of information about the sample.
	"""

	def __init__(
			self,
			id: Union[str, UUID],  # noqa: A002  # pylint: disable=redefined-builtin
			job_type: int,
			run_status: int,
			sample_info: Optional[dict] = None,
			):

		super().__init__()

		if isinstance(id, UUID):
			self.id = id
		else:
			self.id = UUID(str(id))

		self.job_type = int(job_type)
		self.run_status = int(run_status)

		if sample_info:
			self.sample_info = sample_info
		else:
			self.sample_info = {}

	__slots__ = ["id", "job_type", "run_status", "sample_info"]

	# dtypes
	# 8: Str
	# Inj Vol, Dilution and Equilib Time (min) 5

[docs]	@classmethod
	def from_xml(
			cls,
			element: lxml.objectify.ObjectifiedElement,
			user_columns: Optional[Dict[str, Column]] = None,
			) -> "JobData":
		"""
		Construct a :class:`~.JobData` object from an XML element.

		:param element: The XML element to parse the data from
		:param user_columns: Optional mapping of user column labels to
			:class:`~mh_utils.worklist_parser.columns.Column` objects.
		"""

		return cls(
				id=element.ID,
				job_type=element.JobType,
				run_status=element.RunStatus,
				sample_info=parse_sample_info(element.SampleInfo, user_columns),
				)

	@property
	def __dict__(self):
		data = {}
		for key in self.__slots__:
			if key == "id":
				data[key] = str(self.id)
			else:
				data[key] = getattr(self, key)

		return data

	def __repr__(self) -> str:
		values = ", ".join(f"{key}={val!r}" for key, val in iter(self) if key != "sample_info")
		return f"{self.__class__.__name__}({values})"


[docs]@prettify_docstrings
class Worklist(XMLFileMixin, Dictable):
	"""
	Class that represents an Agilent MassHunter worklist.

	:param version: WorklistInfo version number
	:param locked_run_mode: Flag to indicate whether the data was acquired in locked mode. Yes = -1. No = 0.
	:param instrument_name: The name of the instrument.
	:param params: Mapping of parameter names to values. TODO: Check
	:param user_columns: Mapping of user columns to ??? TODO
	:param jobs:
	:param checksum: The checksum of the worklist file. The format is unknown.
	"""

	def __init__(
			self,
			version: float,
			locked_run_mode: bool,
			instrument_name: str,
			params: dict,
			user_columns: dict,
			jobs: Sequence[JobData],
			checksum: "Checksum",
			):

		super().__init__()

		self.version = float(version)
		self.locked_run_mode = bool(locked_run_mode)
		self.instrument_name = str(instrument_name)
		self.params = params
		self.user_columns = user_columns
		self.jobs = list(jobs)
		self.checksum = checksum

	__slots__ = ["version", "user_columns", "jobs", "checksum", "locked_run_mode", "instrument_name", "params"]

	@property
	def __dict__(self):
		data = {}
		for key in self.__slots__:
			data[key] = getattr(self, key)

		return data

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Worklist":
		"""
		Construct a :class:`~.Worklist` object from an XML element.
		"""

		version = float(element.Version)
		checksum = Checksum.from_xml(element.Checksum)

		WorklistInfo = element.WorklistInfo

		if WorklistInfo.LockedRunMode == -1:
			locked_run_mode = True
		elif WorklistInfo.LockedRunMode == 0:
			locked_run_mode = False
		else:
			raise ValueError("Unknown value for 'LockedRunMode'")

		instrument_name = str(WorklistInfo.Instrument)
		params = parse_params(WorklistInfo.Params)

		attributes_list: List[Attribute] = []
		jobs_list: List[JobData] = []

		user_columns: Dict[str, Column] = {}

		for attribute in WorklistInfo.AttributeInformation.iterchildren("Attributes"):
			attribute = Attribute.from_xml(attribute)
			attributes_list.append(attribute)

			if attribute.attribute_type != AttributeType.SystemDefined:
				column = Column.from_attribute(attribute)
				user_columns[column.name] = column

		for job in WorklistInfo.JobDataList.iterchildren("JobData"):
			jobs_list.append(JobData.from_xml(job, user_columns))

		return cls(
				version=version,
				locked_run_mode=locked_run_mode,
				instrument_name=instrument_name,
				params=params,
				user_columns=user_columns,
				jobs=jobs_list,
				checksum=checksum,
				)

[docs]	def __repr__(self) -> str:
		return f"{self.__class__.__name__}({pformat(dict(self))})"

[docs]	def as_dataframe(self) -> pandas.DataFrame:
		"""
		Returns the :class:`~.Worklist` as a :class:`pandas.DataFrame`.

		:rtype:

		.. clearpage::
		"""

		headers = [col for col in columns] + [col for col in self.user_columns]
		data = []

		for job in self.jobs:
			row = []

			for header_label in headers:
				row.append(job.sample_info[header_label])

			data.append(row)

		# TODO: Sort columns by "reorder_id"

		return pandas.DataFrame(data, columns=headers)


[docs]@serde
@add_attrs_doc
@attr.s(slots=True)
class Checksum:
	"""
	Represents a checksum for a worklist.

	The format of the checksum is unknown.
	"""

	SchemaVersion: int = attr.ib(converter=int)
	ALGO_VERSION: int = attr.ib(converter=int)
	HASHCODE: str = attr.ib(converter=str)

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Checksum":
		"""
		Construct a :class:`~.Checksum` object from an XML element.
		"""

		return cls(
				SchemaVersion=element.attrib["SchemaVersion"],
				ALGO_VERSION=element.attrib["ALGO_VERSION"],
				HASHCODE=element.MAIN.attrib["HASHCODE"]
				)


[docs]@serde
@add_attrs_doc
@attr.s(slots=True, repr=False)
class Macro:
	"""
	Represents a macro in a worklist.

	:param output_parameter: .
	"""

	project_name: str = attr.ib(converter=strip_string)
	procedure_name: str = attr.ib(converter=strip_string)
	input_parameter: str = attr.ib(converter=strip_string)
	output_data_type: int = attr.ib(converter=int)
	output_parameter: str = attr.ib(converter=strip_string)
	"""
	.. clearpage::
	"""

	display_string: str = attr.ib(converter=strip_string)

	# TODO: enum for output_data_type

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Macro":
		"""
		Construct a :class:`~.Macro` object from an XML element.
		"""

		return cls(
				project_name=element.ProjectName,
				procedure_name=element.ProcedureName,
				input_parameter=element.InputParameter,
				output_data_type=element.OutputDataType,
				output_parameter=element.OutputParameter,
				display_string=element.DisplayString,
				)

	@property
	def undefined(self) -> bool:
		"""
		Returns whether the macro is undefined.
		"""

		return all([
				self.project_name == '',
				self.procedure_name == '',
				self.input_parameter == '',
				self.output_data_type == 0,
				self.output_parameter == '',
				self.display_string == '',
				])

	def __repr__(self) -> str:
		if self.undefined:
			return f"{self.__class__.__name__}(Undefined)"
		else:
			slots = self.__slots__  # type: ignore[attr-defined]  # attrs adds __slots__ but mypy doesn't know
			values = ", ".join(f"{x}={getattr(self, x)!r}" for x in slots if x != "__weakref__")
			return f"{self.__class__.__name__}({values})"


[docs]@serde
@add_attrs_doc
@attr.s(slots=True)
class Attribute:
	r"""
	Represents an Attribute.

	.. raw:: latex

		\begin{multicols}{2}

	:param attribute_type: The attribute type identifier.
	:param field_type: The field type identifier.

	.. raw:: latex

		\end{multicols}

	.. clearpage::

	"""

	attribute_id: int = attr.ib(converter=int)

	attribute_type: AttributeType = attr.ib(converter=AttributeType)
	"""
	The attribute type identifier.

	Can be System Defined (``0``), System Used (``1``), or User Added (``2``).
	"""

	field_type: int = attr.ib(converter=int)
	"""
	The field type identifier.

	Each of the system defined columns have a field type starting from sampleid = 0 to reserved6 = 24.

	The system used column can be 'compound param' = 35, 'optim param' = 36,
	'mass param' = 37 and 'protein param' = 38.

	The User added columns start from 45.

	.. clearpage::
	"""

	system_name: str = attr.ib(converter=strip_string)
	header_name: str = attr.ib(converter=strip_string)

	# TODO: determine data_type and use it to cast the values and the default value
	# Perhaps
	# DataFileValuedata_type = bdict(
	# Unspecified=0,
	# Byte=1,
	# Int16=2,
	# Int32=3,
	# Int64=4,
	# Float32=5,
	# Float64=6,
	# )
	data_type: Any = attr.ib(converter=int)

	default_data_value: str = attr.ib(converter=strip_string)
	reorder_id: int = attr.ib(converter=int)
	show_hide_status: bool = attr.ib(converter=element_to_bool)
	column_width: int = attr.ib(converter=int)

	# TODO: enum for output_data_type

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Attribute":
		"""
		Construct an :class:`~.Attribute` object from an XML element.
		"""

		return cls(
				attribute_id=element.AttributeID,
				attribute_type=element.AttributeType,
				field_type=element.FieldType,
				system_name=element.SystemName,
				header_name=element.HeaderName,
				data_type=element.DataType,
				default_data_value=element.DefaultDataValue,
				reorder_id=element.ReorderID,
				show_hide_status=element.ShowHideStatus,
				column_width=element.ColumnWidth,
				)