diff --git a/README.rst b/README.rst index 7862443..ebd1ed6 100644 --- a/README.rst +++ b/README.rst @@ -27,21 +27,21 @@ HGVS position crossmapper This library provides an interface to convert (cross map) between different HGVS numbering_ systems. -Converting between the transcript oriented c. or n. and the genomic oriented g. +Converting between the transcript oriented ``c.`` or ``n.`` and the genomic oriented ``g.`` numbering systems can be difficult, especially when the transcript in question -resides on the complement strand. +resides on the complement strand. This library provides functions to convert between any HGVS +numbering system to standard (0-based) coordinates and vice versa. **Features:** -- Support for genomic positions to standard coordinates and vice versa. -- Support for noncoding positions to standard coordinates and vice versa. -- Support for coding positions to standard coordinates and vice versa. -- Support for protein positions to standard coordinates and vice versa. -- Basic classes for loci that can be used for genomic loci other than genes. +- Support for genomic (``g.``, ``m.``, ``o.``) positions to standard coordinates and vice versa. +- Support for noncoding (``n.``, ``r.``) positions to standard coordinates and vice versa. +- Support for coding (``c.``, ``r.``) positions to standard coordinates and vice versa. +- Support for protein (``p.``) positions to standard coordinates and vice versa. +- Basic classes that can be used for loci other than genes or transcripts. Please see ReadTheDocs_ for the latest documentation. - Quick start ----------- @@ -53,8 +53,8 @@ positions and coordinates. >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate(1) + {'position': 1} + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 On top of the functionality provided by the ``Genomic`` class, the @@ -67,8 +67,8 @@ positions and coordinates. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) >>> crossmap.coordinate_to_noncoding(35) - (14, 1, 0) - >>> crossmap.noncoding_to_coordinate((14, 1)) + {'position': 14, 'offset': 1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) 35 Add the flag ``inverted=True`` to the constructor when the transcript resides @@ -84,8 +84,8 @@ coordinates as well as conversions between protein positions and coordinates. >>> cds = (32, 43) >>> crossmap = Coding(exons, cds) >>> crossmap.coordinate_to_coding(31) - (-1, 0, -1, 0) - >>> crossmap.coding_to_coordinate((-1, 0, -1)) + {'position': 1, 'offset': 0, 'region': '-'} + >>> crossmap.coding_to_coordinate({'position':1, 'offset':0, 'region':'-'}) 31 Again, the flag ``inverted=True`` can be used for transcripts that reside on @@ -96,10 +96,11 @@ Conversions between protein positions and coordinates are done as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - (2, 2, 0, 0, 0) - >>> crossmap.protein_to_coordinate((2, 2, 0, 0)) + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''} + >>> crossmap.protein_to_coordinate({'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''}) 41 + .. _numbering: http://varnomen.hgvs.org/bg-material/numbering/ .. _ReadTheDocs: https://mutalyzer-crossmapper.readthedocs.io diff --git a/docs/conf.py b/docs/conf.py index d87866b..3dbdd7d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,5 +7,8 @@ release = _get_metadata('Version') autoclass_content = 'both' -extensions = ['sphinx.ext.autodoc'] +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx' + ] master_doc = 'index' diff --git a/docs/library.rst b/docs/library.rst index 6ef095a..39681ab 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -8,21 +8,38 @@ The ``Genomic`` class --------------------- The ``Genomic`` class provides an interface to conversions between genomic -positions and coordinates. +(``g.``, ``m.``, ``o.``) positions and coordinates. + +Genomic Position Model +~~~~~~~~~~~~~~~~~~~~~~~ + +Genomic positions follow the HGVS genomic coordinate system. +They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HGVS. + +.. code-block:: python + + {'position': 1} + +Where: + +- **position**: an integer representing a nucleotide position (> 0) + +Genomic Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() -The functions ``coordinate_to_genomic()`` and ``genomic_to_coordinate`` can be +The functions ``coordinate_to_genomic()`` and ``genomic_to_coordinate()`` can be used to convert to and from genomic positions. .. code:: python >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate(1) + {'position': 1} + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 See section :doc:`api/crossmap` for a detailed description. @@ -32,8 +49,33 @@ The ``NonCoding`` class On top of the functionality provided by the ``Genomic`` class, the ``NonCoding`` class provides an interface to conversions between noncoding -positions and coordinates. Conversions between positioning systems should be -done via a coordinate. +(``n.``, ``r.``) positions and coordinates. Conversions between positioning +systems should be done via a coordinate. + +NonCoding Position Model +~~~~~~~~~~~~~~~~~~~~~~~~ + +Noncoding positions follow the HGVS ``n.`` coordinate system. They are represented +as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. + +.. code-block:: python + + { + 'position': 14, + 'offset': 1, + 'region': '' + } + +Where: + +- **position**: an integer representing a nucleotide position (> 0) +- **offset**: an integer indicating the offset relative to the position (negative for upstream, + positive for downstream) +- **region**: a string describing the region type (empty for positions within a non-coding + transcript, ``u`` for upstream, ``d`` for downstream) + +NonCoding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -41,69 +83,129 @@ done via a coordinate. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) -Now the functions ``coordinate_to_noncoding()`` and -``noncoding_to_coordinate()`` can be used. These functions use a 3-tuple to -represent a noncoding position. +Now the functions ``coordinate_to_noncoding()`` and ``noncoding_to_coordinate()`` +can be used. -.. _table_noncoding: -.. list-table:: Noncoding positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Transcript position. - * - 1 - - Offset. - * - 2 - - Upstream or downstream offset. - -In our example, the HGVS position "g.36" (coordinate ``35``) is equivalent to -position "n.14+1". We can convert between these two as follows. +In our example, the HGVS position ``g.36`` (coordinate *35*) is equivalent to +position ``n.14+1``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_noncoding(35) - (14, 1, 0) + {'position': 14, 'offset': 1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) + 35 -When the coordinate is upstream or downstream of the transcript, the last -element of the tuple denotes the offset with respect to the transcript. This -makes it possible to distinguish between intronic positions and those outside -of the transcript. +When the coordinate is upstream or downstream of the transcript, we use ``u`` to +denote upstream and ``d`` to denote downstream. .. code:: python >>> crossmap.coordinate_to_noncoding(2) - (1, -3, -3) + {'position': 1, 'offset': -3, 'region': 'u'} + >>> crossmap.noncoding_to_coordinate({'position': 1, 'offset': -3, 'region': 'u'}) + 2 >>> crossmap.coordinate_to_noncoding(73) - (22, 2, 2) - -Note that this last element is optional (and ignored) when a conversion to a -coordinate is requested. - - >>> crossmap.noncoding_to_coordinate((14, 1)) - 35 + {'position': 22, 'offset': 2, 'region': 'd'} + >>> crossmap.noncoding_to_coordinate({'position': 22, 'offset': 2, 'region': 'd'}) + 73 For transcripts that reside on the reverse complement strand, the ``inverted`` -parameter should be set to ``True``. In our example, HGVS position "g.36" -(coordinate ``35``) is now equivalent to position "n.9-1". +parameter should be set to ``True``. In our example, HGVS position ``g.36`` +(coordinate *35*) is now equivalent to position ``n.9-1``. .. code:: python >>> crossmap = NonCoding(exons, inverted=True) >>> crossmap.coordinate_to_noncoding(35) - (9, -1, 0) - >>> crossmap.noncoding_to_coordinate((9, -1)) + {'position': 9, 'offset': -1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 9, 'offset': -1, 'region': ''}) 35 +In the following table, we show a number of annotated examples. + +.. _table_noncoding: +.. list-table:: Coordinates to Noncoding Positions mapping. + :header-rows: 1 + + * - coordinate + - position + - offset + - region + - HGVS + * - 0 + - 1 + - -5 + - ``u`` + - ``n.u5`` + * - 4 + - 1 + - -1 + - ``u`` + - ``n.u1`` + * - 5 + - 1 + - 0 + - + - ``n.1`` + * - 24 + - 9 + - 5 + - + - ``n.9+5`` + * - 25 + - 10 + - -5 + - + - ``n.10-5`` + * - 71 + - 22 + - 0 + - + - ``n.22`` + * - 72 + - 22 + - 1 + - ``d`` + - ``n.d1`` + * - 79 + - 22 + - 8 + - ``d`` + - ``n.d8`` + See section :doc:`api/crossmap` for a detailed description. The ``Coding`` class -------------------- The ``Coding`` class provides an interface to all conversions between -positioning systems and coordinates. Conversions between positioning systems -should be done via a coordinate. +coding (``c.``, ``r.``) positions and coordinates. Conversions between +positioning systems should be done via a coordinate. + +Coding Position Model +~~~~~~~~~~~~~~~~~~~~~ +Coding positions follow the HGVS ``c.`` coordinate system. They are +represented as 3-key dictionaries. Here is an example of ``c.*1+3``. + +.. code-block:: python + + { + 'position': 1, + 'offset': 3, + 'region': '*' + } + +Where: + +- **position**: an integer representing a transcript position (> 0) +- **offset**: an integer indicating the offset relative to the position (negative for upstream, + positive for downstream) +- **region**: a string describing the region type (empty for positions within coding DNA sequence, + ``-`` for 5' UTR, ``*`` for 3' UTR, ``u`` for upstream and ``d`` for downstream) + +Coding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -114,131 +216,236 @@ should be done via a coordinate. On top of the functionality provided by the ``NonCoding`` class, the functions ``coordinate_to_coding()`` and ``coding_to_coordinate()`` can be used. These -functions use a 4-tuple to represent a coding position. - -.. list-table:: Coding positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Transcript position. - * - 1 - - Offset. - * - 2 - - Region. - * - 3 - - Upstream or downstream offset. - -The region denotes the location of the position with respect to the CDS. This -is needed in order to work with the HGVS "-" and "*" positions. - -.. list-table:: Coding position regions. - :header-rows: 1 +functions use a 3-key dictionary to represent a coding position. - * - value - - description - - HGVS example - * - ``-1`` - - Upstream of the CDS. - - "c.-10" - * - ``0`` - - In the CDS. - - "c.1" - * - ``1`` - - Downstream of the CDS. - - "c.*10" - -In our example, the HGVS position "g.32" (coordinate ``31``) is equivalent to -position "c.-1". We can convert between these two as follows. +In our example, the HGVS position ``g.32`` (coordinate *31*) is equivalent to +position ``c.-1``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_coding(31) - (-1, 0, -1, 0) - >>> crossmap.coding_to_coordinate((-1, 0, -1)) + {'position': 1, 'offset': 0, 'region': '-'} + >>> crossmap.coding_to_coordinate({'position': 1, 'offset': 0, 'region': '-'}) 31 The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the offset notation. +longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used +instead. Note that the values of ``position`` and ``offset`` are adjusted accordingly. .. code:: python >>> crossmap.coordinate_to_coding(4) - (-11, -1, -1, -1) + {'position': 11, 'offset': -1, 'region': 'u'} >>> crossmap.coordinate_to_coding(4, True) - (-12, 0, -1, -1) + {'position': 12, 'offset': 0, 'region': '-'} -Additionally, the functions ``coordinate_to_protein()`` and -``protein_to_coordinate()`` can be used. These functions use a 5-tuple to -represent a protein position. +In the following table, we show a number of annotated examples. -.. list-table:: Protein positions. +.. _table_coding: +.. list-table:: Coordinates to Coding Positions mapping :header-rows: 1 - * - index - - description + * - coordinate + - position + - offset + - region + - HGVS * - 0 - - Protein position. - * - 1 - - Codon position. - * - 2 - - Offset. - * - 3 - - Region. + - 11 + - -5 + - ``u`` + - ``c.u5`` * - 4 - - Upstream or downstream offset. + - 11 + - -1 + - ``u`` + - ``c.u1`` + * - 5 + - 11 + - 0 + - ``-`` + - ``c.-11`` + * - 24 + - 3 + - 5 + - ``-`` + - ``c.-3+5`` + * - 31 + - 1 + - 0 + - ``-`` + - ``c.-1`` + * - 32 + - 1 + - 0 + - + - ``c.1`` + * - 37 + - 3 + - 3 + - + - ``c.3+3`` + * - 38 + - 4 + - -2 + - + - ``c.4-2`` + * - 43 + - 1 + - 0 + - ``*`` + - ``c.*1`` + * - 61 + - 4 + - -9 + - ``*`` + - ``c.*4-9`` + * - 71 + - 5 + - 0 + - ``*`` + - ``c.*5`` + * - 72 + - 5 + - 1 + - ``d`` + - ``c.d1`` + * - 79 + - 5 + - 8 + - ``d`` + - ``c.d8`` + + +Protein +------- + +Additionally, the functions ``coordinate_to_protein()`` and +``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary +to represent a protein position. Here is one example of three possibilities +for ``p.1`` in HGVS. + +.. code-block:: python + + { + 'position': 1, + 'position_in_codon': 3, + 'offset': 0, + 'region': '' + } -In our example the HGVS position "g.42" (coordinate ``41``) corresponds with -position "p.2". We can convert between these to as follows. +Where: + +- **position**: an integer representing an amino acid position (> 0) +- **position_in_codon**: an integer indexing the position in a codon (1, 2, or 3) +- **offset**: an integer indicating offset relative to the nucleotide specified by `position_in_codon` in the codon +- **region**: a string describing the region type (empty for valid amino acid positions) + +In our example, the HGVS position ``g.42`` (coordinate *41*) corresponds with +position ``p.2``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - (2, 2, 0, 0, 0) - >>> crossmap.protein_to_coordinate((2, 2, 0, 0)) + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''} + >>> crossmap.protein_to_coordinate({'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''}) 41 -Note that the protein position only corresponds with the HGVS "p." notation -when the offset equals ``0`` and the region equals ``1``. In the following +**Note:** protein position only corresponds with the HGVS "p." notation +when the offset equals ``0`` and the region equals empty. In the following table, we show a number of annotated examples. -.. list-table:: Protein positions examples. +.. _table_protein: +.. list-table:: Coordinates to Protein Positions mapping :header-rows: 1 * - coordinate - - protein position - - description - - HGVS position - * - ``4`` - - ``(-4, 2, -1, -1, -1)`` - - Upstream position. - - invalid - * - ``31`` - - ``(-1, 3, 0, -1, 0)`` - - 5' UTR position. - - invalid - * - ``36`` - - ``(1, 3, 2, 0, 0)`` - - Intronic position. - - invalid - * - ``40`` - - ``(2, 1, 0, 0, 0)`` - - Second amino acid, first nucleotide. - - "p.2" - * - ``41`` - - ``(2, 2, 0, 0, 0)`` - - Second amino acid, second nucleotide. - - "p.2" - * - ``43`` - - ``(1, 1, 0, 1, 0)`` - - 3' UTR position. - - invalid - * - ``43`` - - ``(2, 2, 2, 1, 2)`` - - Downstream position. - - invalid + - position + - position_in_codon + - offset + - region + - HGVS + * - 0 + - 4 + - 2 + - -5 + - ``u`` + - + * - 4 + - 4 + - 2 + - -1 + - ``u`` + - + * - 5 + - 4 + - 2 + - 0 + - ``-`` + - + * - 31 + - 1 + - 3 + - 0 + - ``-`` + - + * - 32 + - 1 + - 1 + - 0 + - + - ``p.1`` + * - 33 + - 1 + - 2 + - 0 + - + - ``p.1`` + * - 34 + - 1 + - 3 + - 0 + - + - ``p.1`` + * - 35 + - 1 + - 3 + - 1 + - + - + * - 42 + - 2 + - 3 + - 0 + - + - ``p.2`` + * - 43 + - 1 + - 1 + - 0 + - ``*`` + - + * - 44 + - 1 + - 1 + - 1 + - ``*`` + - + * - 72 + - 2 + - 2 + - 1 + - ``d`` + - + + * - 79 + - 2 + - 2 + - 8 + - ``d`` + - + See section :doc:`api/crossmap` for a detailed description. @@ -276,10 +483,10 @@ The ``Coding`` class makes use of a number of basic classes described in this section. The ``Locus`` class -^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~ The ``Locus`` class is used to deal with offsets with respect to a single -locus. +locus. .. code:: python @@ -288,13 +495,19 @@ locus. This class provides the functions ``to_position()`` and ``to_coordinate()`` for converting from a locus position to a coordinate and vice versa. These -functions work with a 2-tuple, see the section about `The NonCoding class`_ +functions work with a 2-key dictionary, see the section about `The NonCoding class`_ for the semantics. +**Note:** the ``position`` values in the position dictionaries are **0-based**, +so the first base of the locus corresponds to ``{'position': 0, 'offset': 0}``. +This differs from HGVS numbering, which is **1-based**. + .. code:: python >>> locus.to_position(9) - (1, -1) + {'position': 0, 'offset': -1} + >>> locus.to_coordinate({'position': 0, 'offset': -1}) + 9 For loci that reside on the reverse complement strand, the optional ``inverted`` constructor parameter should be set to ``True``. @@ -312,13 +525,20 @@ loci. >>> from mutalyzer_crossmapper import MultiLocus >>> multilocus = MultiLocus([(10, 20), (40, 50)]) -The interface to this class is similar to that of the ``Locus`` class. +The interface to this class is similar to that of the ``Locus`` class. Functions +``to_position()`` and ``to_coordinate()`` work with a 3-key dictionary. + +**Note:** again, the ``position`` values in the position dictionaries are **0-based**. .. code:: python >>> multilocus.to_position(22) - (10, 3) + {'position': 9, 'offset': 3, 'region': ''} + >>> multilocus.to_coordinate({'position': 9, 'offset': 3, 'region': ''}) + 22 >>> multilocus.to_position(38) - (11, -2) + {'position': 10, 'offset': -2, 'region': ''} + >>> multilocus.to_coordinate({'position': 10, 'offset': -2, 'region': ''}) + 38 See section :doc:`api/multi_locus` for a detailed description. diff --git a/mutalyzer_crossmapper/__init__.py b/mutalyzer_crossmapper/__init__.py index 284f6c8..fc079a8 100644 --- a/mutalyzer_crossmapper/__init__.py +++ b/mutalyzer_crossmapper/__init__.py @@ -1,33 +1,19 @@ -"""Crossmapper position conversion library. - -Definitions: - -- Coordinates are zero based, non-negative integers. -- Locations are zero based right-open non-negative integer intervals, - consistent with Python's range() and sequence slicing functions. -- Loci and exons are locations. -- An exon list is a list of locations that, when flattened, is an increasing - sequence. -- A position is a 2-tuple of which the first element is a one based non-zero - integer relative to an element in a location and the second element is an - integer offset relative to the first element. -""" -from pkg_resources import get_distribution +from importlib.metadata import metadata from .crossmapper import Coding, Genomic, NonCoding from .location import nearest_location from .locus import Locus from .multi_locus import MultiLocus +from .models import GenomicPoint, NonCodingPoint, CodingPoint, ProteinPoint -def _get_metadata(name): - pkg = get_distribution('mutalyzer_crossmapper') - - for line in pkg.get_metadata_lines(pkg.PKG_INFO): - if line.startswith('{}: '.format(name)): - return line.split(': ')[1] - - return '' +def _get_metadata(name: str) -> str: + """Get metadata from the package using importlib.metadata""" + try: + meta = metadata('mutalyzer_crossmapper') + return meta.get(name, '') + except Exception: + return '' _copyright_notice = 'Copyright (c) {} <{}>'.format( diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 537efd3..42efc5b 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -1,30 +1,33 @@ from .multi_locus import MultiLocus +from .models import GenomicPoint, NonCodingPoint, CodingPoint, ProteinPoint class Genomic(object): """Genomic crossmap object.""" - def coordinate_to_genomic(self, coordinate): - """Convert a coordinate to a genomic position (g./m./o.). + + def coordinate_to_genomic(self, coordinate: int) -> GenomicPoint: + """Convert a coordinate to a genomic point model (g./m./o.). :arg int coordinate: Coordinate. - :returns int: Genomic position. + :returns GenomicPoint: Genomic point model. """ - return coordinate + 1 + return GenomicPoint(coordinate + 1) - def genomic_to_coordinate(self, position): - """Convert a genomic position (g./m./o.) to a coordinate. + def genomic_to_coordinate(self, point: GenomicPoint) -> int: + """Convert a genomic point (g./m./o.) to a coordinate. - :arg int position: Genomic position. + :arg GenomicPoint point: Genomic point model. :returns int: Coordinate. """ - return position - 1 + return point.position - 1 class NonCoding(Genomic): """NonCoding crossmap object.""" - def __init__(self, locations, inverted=False): + + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -33,33 +36,44 @@ def __init__(self, locations, inverted=False): self._noncoding = MultiLocus(locations, inverted) - def coordinate_to_noncoding(self, coordinate): - """Convert a coordinate to a noncoding position (n./r.). + def coordinate_to_noncoding(self, coordinate: int) -> NonCodingPoint: + """Convert a coordinate to a noncoding point model (n./r.). :arg int coordinate: Coordinate. - :returns tuple: Noncoding position. + :returns NonCodingPoint: Noncoding point model. """ - pos = self._noncoding.to_position(coordinate) - - return pos[0] + 1, pos[1], pos[2] + point = self._noncoding.to_position(coordinate) + return NonCodingPoint( + position=point.position + 1, + offset=point.offset, + region=point.region + ) - def noncoding_to_coordinate(self, position): - """Convert a noncoding position (n./r.) to a coordinate. + def noncoding_to_coordinate(self, point: NonCodingPoint) -> int: + """Convert a noncoding point (n./r.) to a coordinate. - :arg tuple position: Noncoding position. + :arg NonCodingPoint point: Noncoding point model. :returns int: Coordinate. """ - if position[0] > 0: - return self._noncoding.to_coordinate( - (position[0] - 1, position[1])) - return self._noncoding.to_coordinate(position) + return self._noncoding.to_coordinate( + NonCodingPoint( + position=point.position - 1, + offset=point.offset, + region=point.region + ) + ) class Coding(NonCoding): """Coding crossmap object.""" - def __init__(self, locations, cds, inverted=False): + def __init__( + self, + locations: list[tuple[int, int]], + cds: tuple[int, int], + inverted: bool = False + ) -> None: """ :arg list locations: List of locus locations. :arg tuple cds: Locus location. @@ -67,90 +81,182 @@ def __init__(self, locations, cds, inverted=False): """ NonCoding.__init__(self, locations, inverted) - b0 = self._noncoding.to_position(cds[0]) - b1 = self._noncoding.to_position(cds[1]) + cds_start = self._noncoding.to_position(cds[0]) + cds_end = self._noncoding.to_position(cds[1] - 1) + exon_start = self._noncoding.to_position(locations[0][0]) + exon_end = self._noncoding.to_position(locations[-1][1] - 1) if self._inverted: - self._coding = (b1[0] + b1[1] + 1, b0[0] + b0[1] + 1) - self._cds_len = (b0[0] + b0[1]) - (b1[0] + b1[1]) + self._coding = ( + cds_end.position + cds_end.offset, + cds_start.position + cds_start.offset + 1 + ) + self._exons = ( + exon_end.position + exon_end.offset, + exon_start.position + exon_start.offset + 1 + ) else: - self._coding = (b0[0] + b0[1], b1[0] + b1[1]) - self._cds_len = (b1[0] + b1[1]) - (b0[0] + b0[1]) - - def _coordinate_to_coding(self, coordinate): + self._coding = ( + cds_start.position + cds_start.offset, + cds_end.position + cds_end.offset + 1 + ) + self._exons = ( + exon_start.position + exon_start.offset, + exon_end.position + exon_end.offset + 1 + ) + + def _coordinate_to_coding(self, coordinate: int) -> CodingPoint: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. - :returns tuple: Coding position (c./r.). + :returns CodingPoint: Coding position model (c./r.). """ - pos = self._noncoding.to_position(coordinate) - - if pos[0] < self._coding[0]: - return pos[0] - self._coding[0], pos[1], -1, pos[2] - elif pos[0] >= self._coding[1]: - return pos[0] - self._coding[1] + 1, pos[1], 1, pos[2] - return pos[0] - self._coding[0] + 1, pos[1], 0, pos[2] + noncoding_point = self._noncoding.to_position(coordinate) + + position = noncoding_point.position + offset = noncoding_point.offset + region = noncoding_point.region + + if region == 'u': + if self._exons[0] == self._coding[0]: + position = 1 + else: + position = self._coding[0] + elif region == 'd': + if self._exons[1] == self._coding[1]: + position = self._coding[1] - self._coding[0] + else: + position = position - self._coding[1] + 1 + elif position < self._coding[0]: + position = self._coding[0] - position + region = '-' + elif position >= self._coding[1]: + position = position - self._coding[1] + 1 + region = '*' + else: + position = position - self._coding[0] + 1 + region = '' + return CodingPoint(position=position, offset=offset, region=region) - def coordinate_to_coding(self, coordinate, degenerate=False): + def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> CodingPoint: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. :arg bool degenerate: Return a degenerate position. - :returns tuple: Coding position (c./r.). + :returns CodingPoint: Coding position model (c./r.). """ - pos = self._coordinate_to_coding(coordinate) - - if degenerate and pos[3]: - if pos[2] == 0: - if pos[0] == 1 and pos[1] < 0: - return pos[1], 0, -1, pos[3] - if pos[0] == self._cds_len and pos[1] > 0: - return pos[0] + pos[1] - self._cds_len, 0, 1, pos[3] - return pos[0] + pos[1], 0, pos[2], pos[3] - - return pos - - def coding_to_coordinate(self, position): + point = self._coordinate_to_coding(coordinate) + + region = point.region + if not degenerate: + return point + + if region == 'u': + if self._coding[0] == 0: + position = abs(point.offset) + else: + position = point.position + abs(point.offset) + return CodingPoint(position=position, offset=0, region='-') + if region == 'd': + if self._exons[1] == self._coding[1]: + position = abs(point.offset) + else: + position = point.position + abs(point.offset) + return CodingPoint(position=position, offset=0, region='*') + return point + + def coding_to_coordinate(self, point: CodingPoint) -> int: """Convert a coding position (c./r.) to a coordinate. - :arg tuple position: Coding position (c./r.). + :arg CodingPoint point: Coding position model (c./r.). :returns int: Coordinate. """ - if position[2] == -1: - return self._noncoding.to_coordinate( - (position[0] + self._coding[0], position[1])) - elif position[2] == 1: + + region = point.region + + if region in ('u', 'd'): + return self._noncoding.to_coordinate(point) + + position = point.position + if region == '': + noncoding_point = NonCodingPoint( + position=position + self._coding[0] - 1, + offset=point.offset, + region='' + ) + return self._noncoding.to_coordinate(noncoding_point) + + if region == '-': + if position <= self._coding[0]: + return self._noncoding.to_coordinate( + NonCodingPoint( + position=self._coding[0] - position, + offset=point.offset, + region='' + ) + ) return self._noncoding.to_coordinate( - (position[0] + self._coding[1] - 1, position[1])) - return self._noncoding.to_coordinate( - (position[0] + self._coding[0] - 1, position[1])) + NonCodingPoint( + position=0, + offset=point.offset + 1 - position, + region='u' + ) + ) - def coordinate_to_protein(self, coordinate): + return self._noncoding.to_coordinate( + NonCodingPoint( + position=self._coding[1] + position - 1, + offset=point.offset, + region='' + ) + ) + + def coordinate_to_protein(self, coordinate: int) -> ProteinPoint: """Convert a coordinate to a protein position (p.). :arg int coordinate: Coordinate. - :returns tuple: Protein position (p.). + :returns ProteinPoint: Protein position model(p.). """ - pos = self.coordinate_to_coding(coordinate) - - if pos[2] == -1: - return (pos[0] // 3, pos[0] % 3 + 1, *pos[1:]) - return ((pos[0] + 2) // 3, (pos[0] + 2) % 3 + 1, *pos[1:]) - - def protein_to_coordinate(self, position): + point = self.coordinate_to_coding(coordinate) + + position = point.position + if point.region in ('-', 'u'): + return ProteinPoint( + position=abs(-position // 3), + position_in_codon=-position % 3 + 1, + region=point.region, + offset=point.offset + ) + return ProteinPoint( + position=(position + 2) // 3, + position_in_codon=(position + 2) % 3 + 1, + region=point.region, + offset=point.offset + ) + + def protein_to_coordinate(self, point: ProteinPoint) -> int: """Convert a protein position (p.) to a coordinate. - :arg tuple position: Protein position (p.). + :arg ProteinPoint point: Protein position model(p.). :returns int: Coordinate. """ - if position[3] == -1: + if point.region in ('-', 'u'): return self.coding_to_coordinate( - (3 * position[0] + position[1] - 1, *position[2:])) - + CodingPoint( + position=3 * point.position - point.position_in_codon + 1, + offset=point.offset, + region=point.region + ) + ) return self.coding_to_coordinate( - (3 * position[0] + position[1] - 3, *position[2:])) + CodingPoint( + position=3 * point.position + point.position_in_codon - 3, + offset=point.offset, + region=point.region + ) + ) diff --git a/mutalyzer_crossmapper/location.py b/mutalyzer_crossmapper/location.py index e580672..9b534e4 100644 --- a/mutalyzer_crossmapper/location.py +++ b/mutalyzer_crossmapper/location.py @@ -1,4 +1,4 @@ -def _nearest_boundary(lb, rb, c, p): +def _nearest_boundary(lb: int, rb: int, c: int, p: int) -> int: """Find the boundary nearest to `c`. In case of a draw, the parameter `p` decides which one is chosen. @@ -19,7 +19,7 @@ def _nearest_boundary(lb, rb, c, p): return p -def nearest_location(ls, c, p=0): +def nearest_location(ls: list[tuple[int, int]], c: int, p: int = 0) -> int: """Find the location nearest to `c`. In case of a draw, the parameter `p` decides which index is chosen. diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index 14a9d20..207a6b6 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -1,6 +1,9 @@ +from .models import Point + + class Locus(object): """Locus object.""" - def __init__(self, location, inverted=False): + def __init__(self, location: tuple[int, int], inverted: bool = False) -> None: """ :arg tuple location: Locus location. :arg bool inverted: Orientation. @@ -10,33 +13,33 @@ def __init__(self, location, inverted=False): self.boundary = location[0], location[1] - 1 self._end = self.boundary[1] - self.boundary[0] - def to_position(self, coordinate): - """Convert a coordinate to a proper position. + def to_position(self, coordinate: int) -> Point: + """Convert a coordinate to a proper point model. :arg int coordinate: Coordinate. - :returns tuple: Position. + :returns Point: Position point with 'position' and 'offset'. """ if self._inverted: if coordinate > self.boundary[1]: - return 0, self.boundary[1] - coordinate + return Point(position=0, offset=self.boundary[1] - coordinate) if coordinate < self.boundary[0]: - return self._end, self.boundary[0] - coordinate - return self.boundary[1] - coordinate, 0 + return Point(position=self._end, offset=self.boundary[0] - coordinate) + return Point(position=self.boundary[1] - coordinate, offset=0) if coordinate < self.boundary[0]: - return 0, coordinate - self.boundary[0] + return Point(position=0, offset=coordinate - self.boundary[0]) if coordinate > self.boundary[1]: - return self._end, coordinate - self.boundary[1] - return coordinate - self.boundary[0], 0 + return Point(position=self._end, offset=coordinate - self.boundary[1]) + return Point(position=coordinate - self.boundary[0], offset=0) - def to_coordinate(self, position): - """Convert a position to a coordinate. + def to_coordinate(self, point: Point) -> int: + """Convert a point model to a coordinate. - :arg int position: Position. + :arg Point point: Point model with 'position' and 'offset'. :returns int: Coordinate. """ if self._inverted: - return self.boundary[1] - position[0] - position[1] - return self.boundary[0] + position[0] + position[1] + return self.boundary[1] - point.position - point.offset + return self.boundary[0] + point.position + point.offset diff --git a/mutalyzer_crossmapper/models.py b/mutalyzer_crossmapper/models.py new file mode 100644 index 0000000..e47f06d --- /dev/null +++ b/mutalyzer_crossmapper/models.py @@ -0,0 +1,72 @@ +from __future__ import annotations +from dataclasses import dataclass + + +# Basic dataclass module for locus and multi_locus +@dataclass(slots=True) +class Point: + position: int + offset: int = 0 + region: str = '' + + +@dataclass(slots=True) +class GenomicPoint: + position: int + + def __post_init__(self) -> None: + self._validate_position(self.position) + + def __str__(self) -> str: + return f"{self.position}" + + @staticmethod + def _validate_position(position: int) -> None: + if not isinstance(position, int) or position < 0: + raise ValueError('position must be a non-negative integer') + + +@dataclass(slots=True) +class NonCodingPoint(GenomicPoint): + offset: int = 0 + region: str = '' + + allowed_regions = {'', 'u', 'd'} + + def __post_init__(self) -> None: + GenomicPoint.__post_init__(self) + self._validate_offset(self.offset) + self._validate_region(self.region) + + @staticmethod + def _validate_offset(offset: int) -> None: + if not isinstance(offset, int): + raise TypeError('offset must be an integer') + + def _validate_region(self, region: str) -> None: + if not isinstance(region, str) or region not in self.allowed_regions: + raise ValueError(f'region must be a string in {self.allowed_regions}') + + def __str__(self) -> str: + if self.offset == 0: + return f"{self.region}{self.position}" + return f"{self.region}{self.position}{self.offset:+}" + + +@dataclass(slots=True) +class CodingPoint(NonCodingPoint): + allowed_regions = {'', 'u', 'd', '-', '*'} + + +@dataclass(slots=True) +class ProteinPoint(CodingPoint): + position_in_codon: int = 1 + + def __post_init__(self) -> None: + CodingPoint.__post_init__(self) + self._validate_position_in_codon(self.position_in_codon) + + @staticmethod + def _validate_position_in_codon(position_in_codon: int) -> None: + if not isinstance(position_in_codon, int) or position_in_codon not in (1, 2, 3): + raise ValueError('position_in_codon must be 1, 2, or 3') diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 9836549..133c9f2 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -3,9 +3,10 @@ from .location import nearest_location from .locus import Locus +from .models import Point -def _offsets(locations, orientation): +def _offsets(locations: list[tuple[int, int]], orientation: int) -> list[int]: """For each location, calculate the length of the preceding locations. :arg list locations: List of locations. @@ -19,7 +20,7 @@ def _offsets(locations, orientation): class MultiLocus(object): """MultiLocus object.""" - def __init__(self, locations, inverted=False): + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -31,12 +32,12 @@ def __init__(self, locations, inverted=False): self._orientation = -1 if inverted else 1 self._offsets = _offsets(locations, self._orientation) - def _direction(self, index): + def _direction(self, index: int) -> int: if self._inverted: return len(self._offsets) - index - 1 return index - def outside(self, coordinate): + def outside(self, coordinate: int) -> int: """Calculate the offset relative to this MultiLocus. :arg int coordinate: Coordinate. @@ -49,32 +50,49 @@ def outside(self, coordinate): return coordinate - self._loci[-1].boundary[1] return 0 - def to_position(self, coordinate): - """Convert a coordinate to a position. + def to_position(self, coordinate: int) -> Point: + """Convert a coordinate to a point model. :arg int coordinate: Coordinate. - :returns tuple: Position. + :returns Point: CodingPoint model with 'position', 'offset', and 'region'. """ index = nearest_location(self._locations, coordinate, self._inverted) outside = self._orientation * self.outside(coordinate) - location = self._loci[index].to_position(coordinate) + region = 'u' if outside < 0 else 'd' if outside > 0 else '' + point = self._loci[index].to_position(coordinate) + return Point( + position=point.position + self._offsets[self._direction(index)], + offset=point.offset, + region=region, + ) - return ( - location[0] + self._offsets[self._direction(index)], - location[1], - outside) + def to_coordinate(self, point: Point) -> int: + """Convert a point model to a coordinate. - def to_coordinate(self, position): - """Convert a position to a coordinate. - - :arg int position: Position. + :arg CodingPoint point: Point model with 'position', 'offset', and 'region'. :returns int: Coordinate. """ + region = point.region + + if region == 'u': + if self._inverted: + return self._locations[-1][1] - point.offset - 1 + return self._locations[0][0] + point.offset + if region == 'd': + if self._inverted: + return self._locations[0][0] - point.offset + return self._locations[-1][1] + point.offset - 1 + index = min( len(self._offsets), - max(0, bisect_right(self._offsets, position[0]) - 1)) - + max(0, bisect_right(self._offsets, point.position) - 1) + ) return self._loci[self._direction(index)].to_coordinate( - (position[0] - self._offsets[index], position[1])) + Point( + position=point.position - self._offsets[index], + offset=point.offset, + region=point.region, + ) + ) diff --git a/setup.cfg b/setup.cfg index ca7be54..c58c160 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,7 @@ classifiers = [options] packages = find: +python_requires = >=3.10 [options.extras_require] tests = diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index dbe6b47..2b6a92d 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -1,4 +1,5 @@ from mutalyzer_crossmapper import Coding, Genomic, NonCoding +from mutalyzer_crossmapper.models import CodingPoint, GenomicPoint, NonCodingPoint, ProteinPoint from helper import degenerate_equal, invariant @@ -11,9 +12,17 @@ def test_Genomic(): crossmap = Genomic() invariant( - crossmap.coordinate_to_genomic, 0, crossmap.genomic_to_coordinate, 1) + crossmap.coordinate_to_genomic, + 0, + crossmap.genomic_to_coordinate, + GenomicPoint(position=1), + ) invariant( - crossmap.coordinate_to_genomic, 98, crossmap.genomic_to_coordinate, 99) + crossmap.coordinate_to_genomic, + 98, + crossmap.genomic_to_coordinate, + GenomicPoint(position=99), + ) def test_NonCoding(): @@ -22,19 +31,37 @@ def test_NonCoding(): # Boundary between upstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, (1, -1, -1)) + crossmap.coordinate_to_noncoding, + 3, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=1, offset=-2, region='u'), + ) invariant( - crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, (1, 0, 0)) + crossmap.coordinate_to_noncoding, + 4, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=1, offset=-1, region='u'), + ) + invariant( + crossmap.coordinate_to_noncoding, + 5, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=1, offset=0, region=''), + ) # Boundary between downstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, (22, 0, 0)) + crossmap.coordinate_to_noncoding, + 71, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=22, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, (22, 1, 1)) + crossmap.coordinate_to_noncoding, + 72, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=22, offset=1, region='d'), + ) def test_NonCoding_inverted(): @@ -43,19 +70,31 @@ def test_NonCoding_inverted(): # Boundary between upstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, (1, -1, -1)) + crossmap.coordinate_to_noncoding, + 72, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=1, offset=-1, region='u'), + ) invariant( - crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, (1, 0, 0)) + crossmap.coordinate_to_noncoding, + 71, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=1, offset=0, region=''), + ) # Boundary between downstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, (22, 0, 0)) + crossmap.coordinate_to_noncoding, + 5, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=22, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, (22, 1, 1)) + crossmap.coordinate_to_noncoding, + 4, + crossmap.noncoding_to_coordinate, + NonCodingPoint(position=22, offset=1, region='d'), + ) def test_NonCoding_degenerate(): @@ -64,13 +103,25 @@ def test_NonCoding_degenerate(): # Boundary between upstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 4, - [(1, -1, -1), (-1, 0, -1)]) + crossmap.noncoding_to_coordinate, + 4, + [ + NonCodingPoint(position=1, offset=-1, region=''), + NonCodingPoint(position=1, offset=-1, region='u'), + ], + ) # Boundary between downstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 72, - [(22, 1, 1), (23, 0, 1)]) + crossmap.noncoding_to_coordinate, + 72, + [ + NonCodingPoint(position=22, offset=1, region='d'), + NonCodingPoint(position=22, offset=1, region=''), + NonCodingPoint(position=23, offset=0, region=''), + NonCodingPoint(position=24, offset=-1, region=''), + ], + ) def test_NonCoding_inverted_degenerate(): @@ -79,13 +130,24 @@ def test_NonCoding_inverted_degenerate(): # Boundary between upstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 72, - [(1, -1, -1), (-1, 0, -1)]) + crossmap.noncoding_to_coordinate, + 72, + [ + NonCodingPoint(position=1, offset=-1, region=''), + NonCodingPoint(position=1, offset=-1, region='u'), + ], + ) # Boundary between downstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 4, - [(22, 1, 1), (23, 0, 1)]) + crossmap.noncoding_to_coordinate, + 4, + [ + NonCodingPoint(position=22, offset=1, region='d'), + NonCodingPoint(position=23, offset=0, region=''), + NonCodingPoint(position=22, offset=1, region=''), + ], + ) def test_Coding(): @@ -94,19 +156,31 @@ def test_Coding(): # Boundary between 5' and CDS. invariant( - crossmap.coordinate_to_coding, 31, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 31, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='-'), + ) invariant( - crossmap.coordinate_to_coding, 32, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 32, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region=''), + ) # Boundary between CDS and 3'. invariant( - crossmap.coordinate_to_coding, 42, - crossmap.coding_to_coordinate, (6, 0, 0, 0)) + crossmap.coordinate_to_coding, + 42, + crossmap.coding_to_coordinate, + CodingPoint(position=6, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 43, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='*'), + ) def test_Coding_inverted(): @@ -115,19 +189,31 @@ def test_Coding_inverted(): # Boundary between 5' and CDS. invariant( - crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 43, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='-'), + ) invariant( - crossmap.coordinate_to_coding, 42, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 42, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region=''), + ) # Boundary between CDS and 3'. invariant( - crossmap.coordinate_to_coding, 32, - crossmap.coding_to_coordinate, (6, 0, 0, 0)) + crossmap.coordinate_to_coding, + 32, + crossmap.coding_to_coordinate, + CodingPoint(position=6, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_coding, 31, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 31, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='*'), + ) def test_Coding_regions(): @@ -136,19 +222,31 @@ def test_Coding_regions(): # Upstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 25, - crossmap.coding_to_coordinate, (-1, 5, -1, 0)) + crossmap.coordinate_to_coding, + 25, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=5, region='-'), + ) invariant( - crossmap.coordinate_to_coding, 26, - crossmap.coding_to_coordinate, (1, -4, 0, 0)) + crossmap.coordinate_to_coding, + 26, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-4, region=''), + ) # Downstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 44, - crossmap.coding_to_coordinate, (10, 5, 0, 0)) + crossmap.coordinate_to_coding, + 44, + crossmap.coding_to_coordinate, + CodingPoint(position=10, offset=5, region=''), + ) invariant( - crossmap.coordinate_to_coding, 45, - crossmap.coding_to_coordinate, (1, -4, 1, 0)) + crossmap.coordinate_to_coding, + 45, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-4, region='*'), + ) def test_Coding_regions_inverted(): @@ -157,19 +255,31 @@ def test_Coding_regions_inverted(): # Upstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 44, - crossmap.coding_to_coordinate, (-1, 5, -1, 0)) + crossmap.coordinate_to_coding, + 44, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=5, region='-'), + ) invariant( - crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (1, -4, 0, 0)) + crossmap.coordinate_to_coding, + 43, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-4, region=''), + ) # Downstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 25, - crossmap.coding_to_coordinate, (10, 5, 0, 0)) + crossmap.coordinate_to_coding, + 25, + crossmap.coding_to_coordinate, + CodingPoint(position=10, offset=5, region=''), + ) invariant( - crossmap.coordinate_to_coding, 24, - crossmap.coding_to_coordinate, (1, -4, 1, 0)) + crossmap.coordinate_to_coding, + 24, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-4, region='*'), + ) def test_Coding_no_utr5(): @@ -178,11 +288,61 @@ def test_Coding_no_utr5(): # Direct transition from upstream to CDS. invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (1, -1, 0, -1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-1, region='u'), + ) + invariant( + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region=''), + ) + + +def test_Coding_no_intron(): + crossmap = Coding([(10, 20), (20, 30)], (15, 25)) + + invariant( + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + CodingPoint(position=6, offset=0, region=''), + ) + + +def test_Coding_no_intron_inverted(): + crossmap = Coding([(10, 20), (20, 30)], (15, 25), True) + + invariant( + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + CodingPoint(position=5, offset=0, region=''), + ) + + +def test_Coding_one_base_intron(): + crossmap = Coding([(10, 19), (20, 30)], (15, 25)) + + invariant( + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + CodingPoint(position=4, offset=1, region=''), + ) + + +def test_Coding_one_base_intron_inverted(): + crossmap = Coding([(10, 19), (20, 30)], (15, 25), True) + invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + CodingPoint(position=5, offset=1, region=''), + ) def test_Coding_no_utr5_inverted(): @@ -191,11 +351,17 @@ def test_Coding_no_utr5_inverted(): # Direct transition from upstream to CDS. invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (1, -1, 0, -1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-1, region='u'), + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region=''), + ) def test_Coding_no_utr3(): @@ -204,11 +370,17 @@ def test_Coding_no_utr3(): # Direct transition from CDS to downstream. invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (5, 0, 0, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + CodingPoint(position=5, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (5, 1, 0, 1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + CodingPoint(position=5, offset=1, region='d'), + ) def test_Coding_no_utr3_inverted(): @@ -217,87 +389,149 @@ def test_Coding_no_utr3_inverted(): # Direct transition from CDS to downstream. invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (5, 0, 0, 0)) + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + CodingPoint(position=5, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (5, 1, 0, 1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + CodingPoint(position=5, offset=1, region='d'), + ) def test_Coding_small_utr5(): - """A 5' UTR may be of lenght one.""" + """A 5' UTR may be of length one.""" crossmap = Coding([(10, 20)], (11, 15)) # Transition from upstream to 5' UTR to CDS. invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (-1, -1, -1, -1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-1, region='u'), + ) invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='-'), + ) invariant( - crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 11, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region=''), + ) def test_Coding_small_utr5_inverted(): - """A 5' UTR may be of lenght one.""" + """A 5' UTR may be of length one.""" crossmap = Coding([(10, 20)], (15, 19), True) # Transition from upstream to 5' UTR to CDS. invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (-1, -1, -1, -1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=-1, region='u'), + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='-'), + ) invariant( - crossmap.coordinate_to_coding, 18, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coordinate_to_coding, + 18, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region=''), + ) def test_Coding_small_utr3(): - """A 5' UTR may be of lenght one.""" + """A 5' UTR may be of length one.""" crossmap = Coding([(10, 20)], (15, 19)) # Transition from CDS to 3' UTR to downstream. invariant( - crossmap.coordinate_to_coding, 18, - crossmap.coding_to_coordinate, (4, 0, 0, 0)) + crossmap.coordinate_to_coding, + 18, + crossmap.coding_to_coordinate, + CodingPoint(position=4, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='*'), + ) invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (1, 1, 1, 1)) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=1, region='d'), + ) def test_Coding_small_utr3_inverted(): - """A 5' UTR may be of lenght one.""" + """A 5' UTR may be of length one.""" crossmap = Coding([(10, 20)], (11, 15), True) # Transition from CDS to 3' UTR to downstream. invariant( - crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, (4, 0, 0, 0)) + crossmap.coordinate_to_coding, + 11, + crossmap.coding_to_coordinate, + CodingPoint(position=4, offset=0, region=''), + ) invariant( - crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coordinate_to_coding, + 10, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=0, region='*'), + ) invariant( - crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (1, 1, 1, 1)) + crossmap.coordinate_to_coding, + 9, + crossmap.coding_to_coordinate, + CodingPoint(position=1, offset=1, region='d'), + ) def test_Coding_degenerate(): """Degenerate upstream and downstream positions are silently corrected.""" crossmap = Coding([(10, 20)], (11, 19)) + # Degenerate position in upstream. degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) + crossmap.coding_to_coordinate, + 9, + [ + CodingPoint(position=1, offset=-1, region='u'), + CodingPoint(position=2, offset=0, region='-'), + CodingPoint(position=1, offset=-2, region=''), + CodingPoint(position=1, offset=-10, region='*'), + CodingPoint(position=2, offset=-11, region='*'), + CodingPoint(position=3, offset=1, region='-'), + CodingPoint(position=4, offset=2, region='-'), + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 20, - [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) + crossmap.coding_to_coordinate, + 20, + [ + CodingPoint(position=9, offset=1, region='d'), + CodingPoint(position=2, offset=0, region='*'), + CodingPoint(position=8, offset=2, region=''), + CodingPoint(position=1, offset=10, region='-'), + CodingPoint(position=2, offset=11, region='-'), + CodingPoint(position=7, offset=3, region=''), + ], + ) def test_Coding_inverted_degenerate(): @@ -305,43 +539,145 @@ def test_Coding_inverted_degenerate(): crossmap = Coding([(10, 20)], (11, 19), True) degenerate_equal( - crossmap.coding_to_coordinate, 20, - [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) + crossmap.coding_to_coordinate, + 20, + [ + CodingPoint(position=1, offset=-1, region='u'), + CodingPoint(position=2, offset=0, region='-'), + CodingPoint(position=1, offset=-2, region=''), + CodingPoint(position=1, offset=-10, region='*'), + CodingPoint(position=1, offset=-10, region='d'), + CodingPoint(position=2, offset=-3, region=''), + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) + crossmap.coding_to_coordinate, + 9, + [ + CodingPoint(position=2, offset=1, region='d'), + CodingPoint(position=2, offset=0, region='*'), + CodingPoint(position=8, offset=2, region=''), + CodingPoint(position=1, offset=10, region='-'), + CodingPoint(position=1, offset=10, region='u'), + ], + ) def test_Coding_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19)) - assert crossmap.coordinate_to_coding(9, True) == (-2, 0, -1, -1) - assert crossmap.coordinate_to_coding(20, True) == (2, 0, 1, 1) + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=2, offset=0, region='-') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=2, offset=0, region='*') def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) - assert crossmap.coordinate_to_coding(20, True) == (-2, 0, -1, -1) - assert crossmap.coordinate_to_coding(9, True) == (2, 0, 1, 1) + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=2, offset=0, region='-') + assert crossmap.coordinate_to_coding(25, True) == CodingPoint(position=7, offset=0, region='-') + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=2, offset=0, region='*') + + +def test_Coding_no_utr5_degenerate_return(): + """A 5' UTR may be missing.""" + crossmap = Coding([(10, 20)], (10, 15)) + + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=1, offset=0, region='-') + assert crossmap.coordinate_to_coding(10, True) == CodingPoint(position=1, offset=0, region='') + assert crossmap.coordinate_to_coding(19, True) == CodingPoint(position=5, offset=0, region='*') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=6, offset=0, region='*') + + +def test_Coding_no_utr5_inverted_degenerate_return(): + """A 5' UTR may be missing.""" + crossmap = Coding([(10, 20)], (10, 15), True) + + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=1, offset=0, region='*') + assert crossmap.coordinate_to_coding(10, True) == CodingPoint(position=5, offset=0, region='') + assert crossmap.coordinate_to_coding(19, True) == CodingPoint(position=5, offset=0, region='-') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=6, offset=0, region='-') + + +def test_Coding_no_utr3_degenerate_return(): + """A 3' UTR may be missing.""" + crossmap = Coding([(10, 20)], (15, 20)) + + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=6, offset=0, region='-') + assert crossmap.coordinate_to_coding(10, True) == CodingPoint(position=5, offset=0, region='-') + assert crossmap.coordinate_to_coding(19, True) == CodingPoint(position=5, offset=0, region='') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=1, offset=0, region='*') + + +def test_Coding_no_utr3_inverted_degenerate_return(): + """A 3' UTR may be missing.""" + crossmap = Coding([(10, 20)], (15, 20), True) + + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=6, offset=0, region='*') + assert crossmap.coordinate_to_coding(10, True) == CodingPoint(position=5, offset=0, region='*') + assert crossmap.coordinate_to_coding(19, True) == CodingPoint(position=1, offset=0, region='') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=1, offset=0, region='-') + + +def test_Coding_small_utr5_degenerate_return(): + """A 5' UTR may be of length one.""" + crossmap = Coding([(10, 20)], (11, 15)) + + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=2, offset=0, region='-') + assert crossmap.coordinate_to_coding(10, True) == CodingPoint(position=1, offset=0, region='-') + assert crossmap.coordinate_to_coding(11, True) == CodingPoint(position=1, offset=0, region='') + + +def test_Coding_small_utr5_inverted_degenerate_return(): + """A 5' UTR may be of length one.""" + crossmap = Coding([(10, 20)], (11, 15), True) + + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=2, offset=0, region='*') + assert crossmap.coordinate_to_coding(10, True) == CodingPoint(position=1, offset=0, region='*') + assert crossmap.coordinate_to_coding(11, True) == CodingPoint(position=4, offset=0, region='') + + +def test_Coding_small_utr3_degenerate_return(): + """A 3' UTR may be of length one.""" + crossmap = Coding([(10, 20)], (15, 19)) + + assert crossmap.coordinate_to_coding(18, True) == CodingPoint(position=4, offset=0, region='') + assert crossmap.coordinate_to_coding(19, True) == CodingPoint(position=1, offset=0, region='*') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=2, offset=0, region='*') + + +def test_Coding_small_utr3_inverted_degenerate_return(): + """A 3' UTR may be of length one.""" + crossmap = Coding([(10, 20)], (15, 19), True) + + assert crossmap.coordinate_to_coding(18, True) == CodingPoint(position=1, offset=0, region='') + assert crossmap.coordinate_to_coding(19, True) == CodingPoint(position=1, offset=0, region='-') + assert crossmap.coordinate_to_coding(20, True) == CodingPoint(position=2, offset=0, region='-') + + +def test_Coding_two_exons_inverted_degenerate_return(): + """Degenerate upstream and downstream positions may be returned.""" + crossmap = Coding([(10, 20), (30, 40)], (18, 37), True) + + assert crossmap.coordinate_to_coding(5, True) == CodingPoint(position=13, offset=0, region='*') + assert crossmap.coordinate_to_coding(25, True) == CodingPoint(position=7, offset=5, region='') + assert crossmap.coordinate_to_coding(35, True) == CodingPoint(position=2, offset=0, region='') + assert crossmap.coordinate_to_coding(38, True) == CodingPoint(position=2, offset=0, region='-') def test_Coding_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40)) - assert (crossmap.coordinate_to_coding(25) == - crossmap.coordinate_to_coding(25, True)) + assert crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True) def test_Coding_inverted_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40), True) - assert (crossmap.coordinate_to_coding(25) == - crossmap.coordinate_to_coding(25, True)) + assert crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True) def test_Coding_no_utr_degenerate(): @@ -349,11 +685,26 @@ def test_Coding_no_utr_degenerate(): crossmap = Coding([(10, 11)], (10, 11)) degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) + crossmap.coding_to_coordinate, + 9, + [ + CodingPoint(position=1, offset=-1, region='u'), + # CodingPoint(position=1, offset=0, region='-'), + CodingPoint(position=1, offset=-2, region='*'), + CodingPoint(position=1, offset=-1, region=''), + CodingPoint(position=1, offset=-1, region='d'), + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 11, - [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)]) + crossmap.coding_to_coordinate, + 11, + [ + CodingPoint(position=1, offset=1, region='d'), + CodingPoint(position=1, offset=0, region='*'), + # CodingPoint(position=1, offset=2, region='-'), + CodingPoint(position=1, offset=1, region=''), + ], + ) def test_Coding_inverted_no_utr_degenerate(): @@ -361,55 +712,192 @@ def test_Coding_inverted_no_utr_degenerate(): crossmap = Coding([(10, 11)], (10, 11), True) degenerate_equal( - crossmap.coding_to_coordinate, 11, - [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) + crossmap.coding_to_coordinate, + 11, + [ + CodingPoint(position=1, offset=-1, region='u'), + # CodingPoint(position=1, offset=0, region='-'), + CodingPoint(position=1, offset=-2, region='*'), + CodingPoint(position=1, offset=-1, region=''), + CodingPoint(position=1, offset=-1, region='d'), + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 9, - [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)]) + crossmap.coding_to_coordinate, + 9, + [ + CodingPoint(position=1, offset=1, region='d'), + CodingPoint(position=1, offset=0, region='*'), + # CodingPoint(position=1, offset=2, region='-'), + CodingPoint(position=1, offset=1, region=''), + CodingPoint(position=1, offset=1, region='u'), + ], + ) def test_Coding_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11)) - assert crossmap.coordinate_to_coding(8, True) == (-2, 0, -1, -2) - assert crossmap.coordinate_to_coding(9, True) == (-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(11, True) == (1, 0, 1, 1) - assert crossmap.coordinate_to_coding(12, True) == (2, 0, 1, 2) + assert crossmap.coordinate_to_coding(8, True) == CodingPoint(position=2, offset=0, region='-') + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=1, offset=0, region='-') + assert crossmap.coordinate_to_coding(11, True) == CodingPoint(position=1, offset=0, region='*') + assert crossmap.coordinate_to_coding(12, True) == CodingPoint(position=2, offset=0, region='*') def test_Coding_inverted_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11), True) - assert crossmap.coordinate_to_coding(11, True) == (-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(9, True) == (1, 0, 1, 1) + assert crossmap.coordinate_to_coding(11, True) == CodingPoint(position=1, offset=0, region='-') + assert crossmap.coordinate_to_coding(9, True) == CodingPoint(position=1, offset=0, region='*') def test_Coding_protein(): """Protein positions.""" crossmap = Coding(_exons, _cds) - # Boundary between 5' UTR and CDS. + # Boundary between upstream and 5' UTR + invariant( + crossmap.coordinate_to_protein, + 4, + crossmap.protein_to_coordinate, + ProteinPoint(position=4, offset=-1, region='u', position_in_codon=2) + ) + invariant( + crossmap.coordinate_to_protein, + 5, + crossmap.protein_to_coordinate, + ProteinPoint(position=4, offset=0, region='-', position_in_codon=2) + ) + + # Boundary between 5' UTR and CDS invariant( - crossmap.coordinate_to_protein, 31, - crossmap.protein_to_coordinate, (-1, 3, 0, -1, 0)) + crossmap.coordinate_to_protein, + 31, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='-', position_in_codon=3), + ) invariant( - crossmap.coordinate_to_protein, 32, - crossmap.protein_to_coordinate, (1, 1, 0, 0, 0)) + crossmap.coordinate_to_protein, + 32, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='', position_in_codon=1), + ) # Intron boundary. invariant( - crossmap.coordinate_to_protein, 34, - crossmap.protein_to_coordinate, (1, 3, 0, 0, 0)) + crossmap.coordinate_to_protein, + 34, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='', position_in_codon=3), + ) invariant( - crossmap.coordinate_to_protein, 35, - crossmap.protein_to_coordinate, (1, 3, 1, 0, 0)) + crossmap.coordinate_to_protein, + 35, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=1, region='', position_in_codon=3), + ) # Boundary between CDS and 3' UTR. invariant( - crossmap.coordinate_to_protein, 42, - crossmap.protein_to_coordinate, (2, 3, 0, 0, 0)) + crossmap.coordinate_to_protein, + 42, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=0, region='', position_in_codon=3), + ) + invariant( + crossmap.coordinate_to_protein, + 43, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='*', position_in_codon=1), + ) + + # Boundary between 3' UTR and downstream + invariant( + crossmap.coordinate_to_protein, + 71, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=0, region='*', position_in_codon=2) + ) + invariant( + crossmap.coordinate_to_protein, + 72, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=1, region='d', position_in_codon=2) + ) + + +def test_Coding_inverted_protein(): + """Protein positions.""" + crossmap = Coding(_exons, _cds, True) + + # Boundary between upstream and 5' UTR + invariant( + crossmap.coordinate_to_protein, + 4, + crossmap.protein_to_coordinate, + ProteinPoint(position=4, offset=1, region='d', position_in_codon=2) + ) + invariant( + crossmap.coordinate_to_protein, + 5, + crossmap.protein_to_coordinate, + ProteinPoint(position=4, offset=0, region='*', position_in_codon=2) + ) + + # Boundary between 5' UTR and CDS + invariant( + crossmap.coordinate_to_protein, + 31, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='*', position_in_codon=1), + ) + invariant( + crossmap.coordinate_to_protein, + 32, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=0, region='', position_in_codon=3), + ) + + # Intron boundary. + invariant( + crossmap.coordinate_to_protein, + 34, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=0, region='', position_in_codon=1), + ) + invariant( + crossmap.coordinate_to_protein, + 35, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=-1, region='', position_in_codon=1), + ) + + # Boundary between CDS and 3' UTR. invariant( - crossmap.coordinate_to_protein, 43, - crossmap.protein_to_coordinate, (1, 1, 0, 1, 0)) + crossmap.coordinate_to_protein, + 42, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='', position_in_codon=1), + ) + invariant( + crossmap.coordinate_to_protein, + 43, + crossmap.protein_to_coordinate, + ProteinPoint(position=1, offset=0, region='-', position_in_codon=3), + ) + + # Boundary between 3' UTR and downstream + invariant( + crossmap.coordinate_to_protein, + 71, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=0, region='-', position_in_codon=2) + ) + invariant( + crossmap.coordinate_to_protein, + 72, + crossmap.protein_to_coordinate, + ProteinPoint(position=2, offset=-1, region='u', position_in_codon=2) + ) diff --git a/tests/test_locus.py b/tests/test_locus.py index a873416..2379125 100644 --- a/tests/test_locus.py +++ b/tests/test_locus.py @@ -1,4 +1,5 @@ from mutalyzer_crossmapper import Locus +from mutalyzer_crossmapper.models import Point from helper import degenerate_equal, invariant @@ -7,37 +8,37 @@ def test_Locus(): """Forward orientent Lovus.""" locus = Locus((30, 35)) - invariant(locus.to_position, 29, locus.to_coordinate, (0, -1)) - invariant(locus.to_position, 30, locus.to_coordinate, (0, 0)) - invariant(locus.to_position, 31, locus.to_coordinate, (1, 0)) - invariant(locus.to_position, 33, locus.to_coordinate, (3, 0)) - invariant(locus.to_position, 34, locus.to_coordinate, (4, 0)) - invariant(locus.to_position, 35, locus.to_coordinate, (4, 1)) + invariant(locus.to_position, 29, locus.to_coordinate, Point(position=0, offset=-1)) + invariant(locus.to_position, 30, locus.to_coordinate, Point(position=0, offset=0)) + invariant(locus.to_position, 31, locus.to_coordinate, Point(position=1, offset=0)) + invariant(locus.to_position, 33, locus.to_coordinate, Point(position=3, offset=0)) + invariant(locus.to_position, 34, locus.to_coordinate, Point(position=4, offset=0)) + invariant(locus.to_position, 35, locus.to_coordinate, Point(position=4, offset=1)) def test_Locus_inverted(): """Reverse orientent Lovus.""" locus = Locus((30, 35), True) - invariant(locus.to_position, 35, locus.to_coordinate, (0, -1)) - invariant(locus.to_position, 34, locus.to_coordinate, (0, 0)) - invariant(locus.to_position, 33, locus.to_coordinate, (1, 0)) - invariant(locus.to_position, 31, locus.to_coordinate, (3, 0)) - invariant(locus.to_position, 30, locus.to_coordinate, (4, 0)) - invariant(locus.to_position, 29, locus.to_coordinate, (4, 1)) + invariant(locus.to_position, 35, locus.to_coordinate, Point(position=0, offset=-1)) + invariant(locus.to_position, 34, locus.to_coordinate, Point(position=0, offset=0)) + invariant(locus.to_position, 33, locus.to_coordinate, Point(position=1, offset=0)) + invariant(locus.to_position, 31, locus.to_coordinate, Point(position=3, offset=0)) + invariant(locus.to_position, 30, locus.to_coordinate, Point(position=4, offset=0)) + invariant(locus.to_position, 29, locus.to_coordinate, Point(position=4, offset=1)) def test_Locus_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20)) - degenerate_equal(locus.to_coordinate, 9, [(0, -1), (-1, 0)]) - degenerate_equal(locus.to_coordinate, 20, [(9, 1), (10, 0)]) + degenerate_equal(locus.to_coordinate, 9, [Point(position=0, offset=-1), Point(position=-1, offset=0)]) + degenerate_equal(locus.to_coordinate, 20, [Point(position=9, offset=1), Point(position=10, offset=0)]) def test_Locus_inverted_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20), True) - degenerate_equal(locus.to_coordinate, 20, [(0, -1), (-1, 0)]) - degenerate_equal(locus.to_coordinate, 9, [(9, 1), (10, 0)]) + degenerate_equal(locus.to_coordinate, 20, [Point(position=0, offset=-1), Point(position=-1, offset=0)]) + degenerate_equal(locus.to_coordinate, 9, [Point(position=9, offset=1), Point(position=10, offset=0)]) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 6ce0013..ce02704 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -1,5 +1,6 @@ from mutalyzer_crossmapper import MultiLocus from mutalyzer_crossmapper.multi_locus import _offsets +from mutalyzer_crossmapper.models import Point from helper import degenerate_equal, invariant @@ -32,29 +33,70 @@ def test_MultiLocus(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (0, -1, -1)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + Point(position=0, offset=-1, region='u'), + ) + invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (0, 0, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + Point(position=0, offset=0, region=''), + ) # Internal locus. invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, (9, -1, 0)) - invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, (9, 0, 0)) - invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, (10, 0, 0)) - invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, (12, 0, 0)) - invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, (13, 0, 0)) - invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, (13, 1, 0)) + multi_locus.to_position, + 29, + multi_locus.to_coordinate, + Point(position=9, offset=-1, region=''), + ) + invariant( + multi_locus.to_position, + 30, + multi_locus.to_coordinate, + Point(position=9, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 31, + multi_locus.to_coordinate, + Point(position=10, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 33, + multi_locus.to_coordinate, + Point(position=12, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 34, + multi_locus.to_coordinate, + Point(position=13, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 35, + multi_locus.to_coordinate, + Point(position=13, offset=1, region=''), + ) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, (21, 0, 0)) + multi_locus.to_position, + 71, + multi_locus.to_coordinate, + Point(position=21, offset=0, region=''), + ) invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, (21, 1, 1)) + multi_locus.to_position, + 72, + multi_locus.to_coordinate, + Point(position=21, offset=1, region='d'), + ) def test_MultiLocus_inverted(): @@ -63,29 +105,69 @@ def test_MultiLocus_inverted(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, (0, -1, -1)) + multi_locus.to_position, + 72, + multi_locus.to_coordinate, + Point(position=0, offset=-1, region='u'), + ) invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, (0, 0, 0)) + multi_locus.to_position, + 71, + multi_locus.to_coordinate, + Point(position=0, offset=0, region=''), + ) # Internal locus. invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, (8, -1, 0)) - invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, (8, 0, 0)) - invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, (9, 0, 0)) - invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, (11, 0, 0)) - invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, (12, 0, 0)) - invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, (12, 1, 0)) + multi_locus.to_position, + 35, + multi_locus.to_coordinate, + Point(position=8, offset=-1, region=''), + ) + invariant( + multi_locus.to_position, + 34, + multi_locus.to_coordinate, + Point(position=8, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 33, + multi_locus.to_coordinate, + Point(position=9, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 31, + multi_locus.to_coordinate, + Point(position=11, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 30, + multi_locus.to_coordinate, + Point(position=12, offset=0, region=''), + ) + invariant( + multi_locus.to_position, + 29, + multi_locus.to_coordinate, + Point(position=12, offset=1, region=''), + ) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (21, 0, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + Point(position=21, offset=0, region=''), + ) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (21, 1, 1)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + Point(position=21, offset=1, region='d'), + ) def test_MultiLocus_adjacent_loci(): @@ -93,9 +175,17 @@ def test_MultiLocus_adjacent_loci(): multi_locus = MultiLocus([(1, 3), (3, 5)]) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, (1, 0, 0)) + multi_locus.to_position, + 2, + multi_locus.to_coordinate, + Point(position=1, offset=0, region=''), + ) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (2, 0, 0)) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + Point(position=2, offset=0, region=''), + ) def test_MultiLocus_adjacent_loci_inverted(): @@ -103,9 +193,17 @@ def test_MultiLocus_adjacent_loci_inverted(): multi_locus = MultiLocus([(1, 3), (3, 5)], True) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (1, 0, 0)) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + Point(position=1, offset=0, region=''), + ) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, (2, 0, 0)) + multi_locus.to_position, + 2, + multi_locus.to_coordinate, + Point(position=2, offset=0, region=''), + ) def test_MultiLocus_offsets_odd(): @@ -113,9 +211,17 @@ def test_MultiLocus_offsets_odd(): multi_locus = MultiLocus([(1, 3), (6, 8)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + Point(position=1, offset=2, region=''), + ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (2, -1, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + Point(position=2, offset=-1, region=''), + ) def test_MultiLocus_offsets_odd_inverted(): @@ -123,9 +229,17 @@ def test_MultiLocus_offsets_odd_inverted(): multi_locus = MultiLocus([(1, 3), (6, 8)], True) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + Point(position=1, offset=2, region=''), + ) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (2, -1, 0)) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + Point(position=2, offset=-1, region=''), + ) def test_MultiLocus_offsets_even(): @@ -133,9 +247,17 @@ def test_MultiLocus_offsets_even(): multi_locus = MultiLocus([(1, 3), (7, 9)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + Point(position=1, offset=2, region=''), + ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (2, -2, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + Point(position=2, offset=-2, region=''), + ) def test_MultiLocus_offsets_even_inverted(): @@ -143,9 +265,17 @@ def test_MultiLocus_offsets_even_inverted(): multi_locus = MultiLocus([(1, 3), (7, 9)], True) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + Point(position=1, offset=2, region=''), + ) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (2, -2, 0)) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + Point(position=2, offset=-2, region=''), + ) def test_MultiLocus_degenerate(): @@ -153,9 +283,23 @@ def test_MultiLocus_degenerate(): multi_locus = MultiLocus(_locations) degenerate_equal( - multi_locus.to_coordinate, 4, [(0, -1, -1), (-1, 0, -1)]) + multi_locus.to_coordinate, + 4, + [ + Point(position=0, offset=-1, region='u'), + Point(position=-1, offset=0, region=''), + ], + ) + degenerate_equal( - multi_locus.to_coordinate, 72, [(21, 1, 1), (22, 0, 1)]) + multi_locus.to_coordinate, + 72, + [ + Point(position=21, offset=1, region='d'), + Point(position=22, offset=0, region=''), + Point(position=22, offset=1, region='d'), + ], + ) def test_MultiLocus_inverted_degenerate(): @@ -163,6 +307,21 @@ def test_MultiLocus_inverted_degenerate(): multi_locus = MultiLocus(_locations, True) degenerate_equal( - multi_locus.to_coordinate, 72, [(0, -1, -1), (-1, 0, -1)]) + multi_locus.to_coordinate, + 72, + [ + Point(position=-1, offset=0, region=''), + Point(position=0, offset=-1, region=''), + Point(position=0, offset=-1, region='u'), + ], + ) + degenerate_equal( - multi_locus.to_coordinate, 4, [(21, 1, 1), (22, 0, 1)]) + multi_locus.to_coordinate, + 4, + [ + Point(position=21, offset=1, region=''), + Point(position=22, offset=0, region=''), + Point(position=21, offset=1, region='d'), + ], + )