Source code for ase.formula
import re
from functools import lru_cache
from math import gcd
from typing import Dict, List, Sequence, Tuple, Union
from ase.data import atomic_numbers, chemical_symbols
# For type hints (A, A2, A+B):
Tree = Union[str, Tuple['Tree', int], List['Tree']]
[docs]
class Formula:
    def __init__(self,
                 formula: Union[str, 'Formula'] = '',
                 *,
                 strict: bool = False,
                 format: str = '',
                 _tree: Tree = None,
                 _count: Dict[str, int] = None):
        """Chemical formula object.
        Parameters
        ----------
        formula: str
            Text string representation of formula.  Examples: ``'6CO2'``,
            ``'30Cu+2CO'``, ``'Pt(CO)6'``.
        strict: bool
            Only allow real chemical symbols.
        format: str
            Reorder according to *format*.  Must be one of hill, metal,
            ab2, a2b, periodic or reduce.
        Examples
        --------
        >>> from ase.formula import Formula
        >>> w = Formula('H2O')
        >>> w.count()
        {'H': 2, 'O': 1}
        >>> 'H' in w
        True
        >>> w == 'HOH'
        True
        >>> f'{w:latex}'
        'H$_{2}$O'
        >>> w.format('latex')
        'H$_{2}$O'
        >>> divmod(6 * w + 'Cu', w)
        (6, Formula('Cu'))
        Raises
        ------
        ValueError
            on malformed formula
        """
        # Be sure that Formula(x) works the same whether x is string or Formula
        assert isinstance(formula, (str, Formula))
        formula = str(formula)
        if format:
            assert _tree is None and _count is None
            if format not in {'hill', 'metal', 'abc', 'reduce', 'ab2', 'a2b',
                              'periodic'}:
                raise ValueError(f'Illegal format: {format}')
            formula = Formula(formula).format(format)
        self._formula = formula
        self._tree = _tree or parse(formula)
        self._count = _count or count_tree(self._tree)
        if strict:
            for symbol in self._count:
                if symbol not in atomic_numbers:
                    raise ValueError('Unknown chemical symbol: ' + symbol)
[docs]
    def convert(self, fmt: str) -> 'Formula':
        """Reformat this formula as a new Formula.
        Same formatting rules as Formula(format=...) keyword.
        """
        return Formula(self._formula, format=fmt)
[docs]
    def count(self) -> Dict[str, int]:
        """Return dictionary mapping chemical symbol to number of atoms.
        Example
        -------
        >>> Formula('H2O').count()
        {'H': 2, 'O': 1}
        """
        return self._count.copy()
[docs]
    def reduce(self) -> Tuple['Formula', int]:
        """Reduce formula.
        Returns
        -------
        formula: Formula
            Reduced formula.
        n: int
            Number of reduced formula units.
        Example
        -------
        >>> Formula('2H2O').reduce()
        (Formula('H2O'), 2)
        """
        dct, N = self._reduce()
        return self.from_dict(dct), N
[docs]
    def stoichiometry(self) -> Tuple['Formula', 'Formula', int]:
        """Reduce to unique stoichiometry using "chemical symbols" A, B, C, ...
        Examples
        --------
        >>> Formula('CO2').stoichiometry()
        (Formula('AB2'), Formula('CO2'), 1)
        >>> Formula('(H2O)4').stoichiometry()
        (Formula('AB2'), Formula('OH2'), 4)
        """
        count1, N = self._reduce()
        c = ord('A')
        count2 = {}
        count3 = {}
        for n, symb in sorted((n, symb)
                              for symb, n in count1.items()):
            count2[chr(c)] = n
            count3[symb] = n
            c += 1
        return self.from_dict(count2), self.from_dict(count3), N
[docs]
    def format(self, fmt: str = '') -> str:
        """Format formula as string.
        Formats:
        * ``'hill'``: alphabetically ordered with C and H first
        * ``'metal'``: alphabetically ordered with metals first
        * ``'ab2'``: count-ordered first then alphabetically ordered
        * ``'abc'``: old name for ``'ab2'``
        * ``'a2b'``: reverse count-ordered first then alphabetically ordered
        * ``'periodic'``: periodic-table ordered: period first then group
        * ``'reduce'``: Reduce and keep order (ABBBC -> AB3C)
        * ``'latex'``: LaTeX representation
        * ``'html'``: HTML representation
        * ``'rest'``: reStructuredText representation
        Example
        -------
        >>> Formula('H2O').format('html')
        'H<sub>2</sub>O'
        """
        return format(self, fmt)
[docs]
    def __format__(self, fmt: str) -> str:
        """Format Formula as str.
        Possible formats: ``'hill'``, ``'metal'``, ``'abc'``, ``'reduce'``,
        ``'latex'``, ``'html'``, ``'rest'``.
        Example
        -------
        >>> f = Formula('OH2')
        >>> '{f}, {f:hill}, {f:latex}'.format(f=f)
        'OH2, H2O, OH$_{2}$'
        """
        if fmt == 'hill':
            count = self.count()
            count2 = {symb: count.pop(symb) for symb in 'CH' if symb in count}
            for symb, n in sorted(count.items()):
                count2[symb] = n
            return dict2str(count2)
        if fmt == 'metal':
            count = self.count()
            result2 = [(s, count.pop(s)) for s in non_metals if s in count]
            result = [(s, count[s]) for s in sorted(count)]
            result += sorted(result2)
            return dict2str(dict(result))
        if fmt == 'abc' or fmt == 'ab2':
            _, f, N = self.stoichiometry()
            return dict2str({symb: n * N for symb, n in f._count.items()})
        if fmt == 'a2b':
            _, f, N = self.stoichiometry()
            return dict2str({symb: -n * N
                             for n, symb
                             in sorted([(-n, symb) for symb, n
                                        in f._count.items()])})
        if fmt == 'periodic':
            count = self.count()
            order = periodic_table_order()
            items = sorted(count.items(),
                           key=lambda item: order.get(item[0], 0))
            return ''.join(symb + (str(n) if n > 1 else '')
                           for symb, n in items)
        if fmt == 'reduce':
            symbols = list(self)
            nsymb = len(symbols)
            parts = []
            i1 = 0
            for i2, symbol in enumerate(symbols):
                if i2 == nsymb - 1 or symbol != symbols[i2 + 1]:
                    parts.append(symbol)
                    m = i2 + 1 - i1
                    if m > 1:
                        parts.append(str(m))
                    i1 = i2 + 1
            return ''.join(parts)
        if fmt == 'latex':
            return self._tostr('$_{', '}$')
        if fmt == 'html':
            return self._tostr('<sub>', '</sub>')
        if fmt == 'rest':
            return self._tostr(r'\ :sub:`', r'`\ ')
        if fmt == '':
            return self._formula
        raise ValueError('Invalid format specifier')
[docs]
    @staticmethod
    def from_dict(dct: Dict[str, int]) -> 'Formula':
        """Convert dict to Formula.
        >>> Formula.from_dict({'H': 2})
        Formula('H2')
        """
        dct2 = {}
        for symb, n in dct.items():
            if not (isinstance(symb, str) and isinstance(n, int) and n >= 0):
                raise ValueError(f'Bad dictionary: {dct}')
            if n > 0:  # filter out n=0 symbols
                dct2[symb] = n
        return Formula(dict2str(dct2),
                       _tree=[([(symb, n) for symb, n in dct2.items()], 1)],
                       _count=dct2)
[docs]
    @staticmethod
    def from_list(symbols: Sequence[str]) -> 'Formula':
        """Convert list of chemical symbols to Formula."""
        return Formula(''.join(symbols),
                       _tree=[(symbols[:], 1)])  # type: ignore[list-item]
[docs]
    def __getitem__(self, symb: str) -> int:
        """Number of atoms with chemical symbol *symb*."""
        return self._count.get(symb, 0)
[docs]
    def __contains__(self, f: Union[str, 'Formula']) -> bool:
        """Check if formula contains chemical symbols in *f*.
        Type of *f* must be str or Formula.
        Examples
        --------
        >>> 'OH' in Formula('H2O')
        True
        >>> 'O2' in Formula('H2O')
        False
        """
        if isinstance(f, str):
            f = Formula(f)
        for symb, n in f._count.items():
            if self[symb] < n:
                return False
        return True
[docs]
    def __eq__(self, other) -> bool:
        """Equality check.
        Note that order is not important.
        Example
        -------
        >>> Formula('CO') == Formula('OC')
        True
        """
        if isinstance(other, str):
            other = Formula(other)
        elif not isinstance(other, Formula):
            return False
        return self._count == other._count
[docs]
    def __add__(self, other: Union[str, 'Formula']) -> 'Formula':
        """Add two formulas."""
        if not isinstance(other, str):
            other = other._formula
        return Formula(self._formula + '+' + other)
    def __radd__(self, other: str):  # -> Formula
        return Formula(other) + self
[docs]
    def __mul__(self, N: int) -> 'Formula':
        """Repeat formula `N` times."""
        if N == 0:
            return Formula('')
        return self.from_dict({symb: n * N
                               for symb, n in self._count.items()})
    def __rmul__(self, N: int):  # -> Formula
        return self * N
[docs]
    def __divmod__(self,
                   other: Union['Formula', str]) -> Tuple[int, 'Formula']:
        """Return the tuple (self // other, self % other).
        Invariant::
            div, mod = divmod(self, other)
            div * other + mod == self
        Example
        -------
        >>> divmod(Formula('H2O'), 'H')
        (2, Formula('O'))
        """
        if isinstance(other, str):
            other = Formula(other)
        N = min(self[symb] // n for symb, n in other._count.items())
        dct = self.count()
        if N:
            for symb, n in other._count.items():
                dct[symb] -= n * N
                if dct[symb] == 0:
                    del dct[symb]
        return N, self.from_dict(dct)
    def __rdivmod__(self, other):
        return divmod(Formula(other), self)
    def __mod__(self, other):
        return divmod(self, other)[1]
    def __rmod__(self, other):
        return Formula(other) % self
    def __floordiv__(self, other):
        return divmod(self, other)[0]
    def __rfloordiv__(self, other):
        return Formula(other) // self
    def __iter__(self):
        return self._tree_iter()
    def _tree_iter(self, tree=None):
        if tree is None:
            tree = self._tree
        if isinstance(tree, str):
            yield tree
        elif isinstance(tree, tuple):
            tree, N = tree
            for _ in range(N):
                yield from self._tree_iter(tree)
        else:
            for tree in tree:
                yield from self._tree_iter(tree)
    def __str__(self):
        return self._formula
    def __repr__(self):
        return f'Formula({self._formula!r})'
    def _reduce(self):
        N = 0
        for n in self._count.values():
            if N == 0:
                N = n
            else:
                N = gcd(n, N)
        dct = {symb: n // N for symb, n in self._count.items()}
        return dct, N
    def _tostr(self, sub1, sub2):
        parts = []
        for tree, n in self._tree:
            s = tree2str(tree, sub1, sub2)
            if s[0] == '(' and s[-1] == ')':
                s = s[1:-1]
            if n > 1:
                s = str(n) + s
            parts.append(s)
        return '+'.join(parts)
def dict2str(dct: Dict[str, int]) -> str:
    """Convert symbol-to-number dict to str.
    >>> dict2str({'A': 1, 'B': 2})
    'AB2'
    """
    return ''.join(symb + (str(n) if n > 1 else '')
                   for symb, n in dct.items())
def parse(f: str) -> Tree:
    """Convert formula string to tree structure.
    >>> parse('2A+BC2')
    [('A', 2), (['B', ('C', 2)], 1)]
    """
    if not f:
        return []
    parts = f.split('+')
    result = []
    for part in parts:
        n, f = strip_number(part)
        result.append((parse2(f), n))
    return result  # type: ignore[return-value]
def parse2(f: str) -> Tree:
    """Convert formula string to tree structure (no "+" symbols).
    >>> parse('10(H2O)')
    [(([('H', 2), 'O'], 1), 10)]
    """
    units = []
    while f:
        unit: Union[str, Tuple[str, int], Tree]
        if f[0] == '(':
            level = 0
            for i, c in enumerate(f[1:], 1):
                if c == '(':
                    level += 1
                elif c == ')':
                    if level == 0:
                        break
                    level -= 1
            else:
                raise ValueError
            f2 = f[1:i]
            n, f = strip_number(f[i + 1:])
            unit = (parse2(f2), n)
        else:
            m = re.match('([A-Z][a-z]?)([0-9]*)', f)
            if m is None:
                raise ValueError
            symb = m.group(1)
            number = m.group(2)
            if number:
                unit = (symb, int(number))
            else:
                unit = symb
            f = f[m.end():]
        units.append(unit)
    if len(units) == 1:
        return unit
    return units
def strip_number(s: str) -> Tuple[int, str]:
    """Strip leading nuimber.
    >>> strip_number('10AB2')
    (10, 'AB2')
    >>> strip_number('AB2')
    (1, 'AB2')
    """
    m = re.match('[0-9]*', s)
    assert m is not None
    return int(m.group() or 1), s[m.end():]
def tree2str(tree: Tree,
             sub1: str, sub2: str) -> str:
    """Helper function for html, latex and rest formats."""
    if isinstance(tree, str):
        return tree
    if isinstance(tree, tuple):
        tree, N = tree
        s = tree2str(tree, sub1, sub2)
        if N == 1:
            if s[0] == '(' and s[-1] == ')':
                return s[1:-1]
            return s
        return s + sub1 + str(N) + sub2
    return '(' + ''.join(tree2str(tree, sub1, sub2) for tree in tree) + ')'
def count_tree(tree: Tree) -> Dict[str, int]:
    if isinstance(tree, str):
        return {tree: 1}
    if isinstance(tree, tuple):
        tree, N = tree
        return {symb: n * N for symb, n in count_tree(tree).items()}
    dct = {}  # type: Dict[str, int]
    for tree in tree:
        for symb, n in count_tree(tree).items():
            m = dct.get(symb, 0)
            dct[symb] = m + n
    return dct
# non metals, half-metals/metalloid, halogen, noble gas:
non_metals = ['H', 'He', 'B', 'C', 'N', 'O', 'F', 'Ne',
              'Si', 'P', 'S', 'Cl', 'Ar',
              'Ge', 'As', 'Se', 'Br', 'Kr',
              'Sb', 'Te', 'I', 'Xe',
              'Po', 'At', 'Rn']
@lru_cache
def periodic_table_order() -> Dict[str, int]:
    """Create dict for sorting after period first then row."""
    return {symbol: n for n, symbol in enumerate(chemical_symbols[87:] +
                                                 chemical_symbols[55:87] +
                                                 chemical_symbols[37:55] +
                                                 chemical_symbols[19:37] +
                                                 chemical_symbols[11:19] +
                                                 chemical_symbols[3:11] +
                                                 chemical_symbols[1:3])}
# Backwards compatibility:
[docs]
def formula_hill(numbers, empirical=False):
    """Convert list of atomic numbers to a chemical formula as a string.
    Elements are alphabetically ordered with C and H first.
    If argument `empirical`, element counts will be divided by greatest common
    divisor to yield an empirical formula"""
    symbols = [chemical_symbols[Z] for Z in numbers]
    f = Formula('', _tree=[(symbols, 1)])
    if empirical:
        f, _ = f.reduce()
    return f.format('hill')
# Backwards compatibility:
[docs]
def formula_metal(numbers, empirical=False):
    """Convert list of atomic numbers to a chemical formula as a string.
    Elements are alphabetically ordered with metals first.
    If argument `empirical`, element counts will be divided by greatest common
    divisor to yield an empirical formula"""
    symbols = [chemical_symbols[Z] for Z in numbers]
    f = Formula('', _tree=[(symbols, 1)])
    if empirical:
        f, _ = f.reduce()
    return f.format('metal')