/* Copyright (C) 2002-2007  Christoph Steinbeck <steinbeck@users.sf.net>
 *
 * Contact: cdk-devel@lists.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may distribute
 * with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.fingerprint;

import org.openscience.cdk.aromaticity.Aromaticity;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.PathTools;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.ringsearch.AllRingsFinder;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;

import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

/**
 *  Generates a fingerprint for a given AtomContainer. Fingerprints are
 *  one-dimensional bit arrays, where bits are set according to a the
 *  occurrence of a particular structural feature (See for example the
 *  Daylight inc. theory manual for more information). Fingerprints allow for
 *  a fast screening step to exclude candidates for a substructure search in a
 *  database. They are also a means for determining the similarity of chemical
 *  structures. <p>
 *
 *  A fingerprint is generated for an AtomContainer with this code: <pre>
 *   Molecule molecule = new Molecule();
 *   IFingerprinter fingerprinter = new Fingerprinter();
 *   IBitFingerprint fingerprint = fingerprinter.getBitFingerprint(molecule);
 *   fingerprint.size(); // returns 1024 by default
 *   fingerprint.length(); // returns the highest set bit
 * </pre> <p>
 *
 * <p>The FingerPrinter has the option to ignore explicit hydrogen's
 * ({@link #setHashExplicitHydrogens(boolean)}) and pseudo atoms
 * ({@link #setHashPseudoAtoms(boolean)}). This ensures the
 * fingerprint can be used for substructure screening by default.</p>
 *
 *  <span style="color: #FF0000;">Warning: The aromaticity detection for this
 *  FingerPrinter relies on AllRingsFinder, which is known to take very long
 *  for some molecules with many cycles or special cyclic topologies. Thus,
 *  the AllRingsFinder has a built-in timeout of 5 seconds after which it
 *  aborts and throws an Exception. If you want your SMILES generated at any
 *  expense, you need to create your own AllRingsFinder, set the timeout to a
 *  higher value, and assign it to this FingerPrinter. In the vast majority of
 *  cases, however, the defaults will be fine. </span> <p>
 *
 *  <span style="color: #FF0000;">Another Warning : The daylight manual says:
 *  "Fingerprints are not so definite: if a fingerprint indicates a pattern is
 *  missing then it certainly is, but it can only indicate a pattern's presence
 *  with some probability." In the case of very small molecules, the
 *  probability that you get the same fingerprint for different molecules is
 *  high. </span>
 *  </p>
 *
 * @author         steinbeck
 * @cdk.created    2002-02-24
 * @cdk.keyword    fingerprint
 * @cdk.keyword    similarity
 */
public class Fingerprinter extends AbstractFingerprinter implements IFingerprinter {

    /**
     * Throw an exception if too many paths (per atom) are generated.
     */
    private final static int DEFAULT_PATH_LIMIT = 42000;

    /**
     * The default length of created fingerprints.
     */
    public final static int DEFAULT_SIZE = 1024;
    /**
     * The default search depth used to create the fingerprints.
     */
    public final static int DEFAULT_SEARCH_DEPTH = 7;

    private final int size;
    private final int searchDepth;
    private int pathLimit = DEFAULT_PATH_LIMIT;

    private boolean hashPseudoAtoms = false;
    /** Encode paths with pass through a hydrogen atom. Hydrogens may be
     *  implicit or explicit which will result in different FPs - default:
     *  false*/
    private boolean hashExplHydrogens = false;

    private static final ILoggingTool logger = LoggingToolFactory
            .createLoggingTool(Fingerprinter.class);

    /**
     * Creates a fingerprint generator of length <code>DEFAULT_SIZE</code>
     * and with a search depth of <code>DEFAULT_SEARCH_DEPTH</code>.
     */
    public Fingerprinter() {
        this(DEFAULT_SIZE, DEFAULT_SEARCH_DEPTH);
    }

    public Fingerprinter(int size) {
        this(size, DEFAULT_SEARCH_DEPTH);
    }

    /**
     * Constructs a fingerprint generator that creates fingerprints of
     * the given size, using a generation algorithm with the given search
     * depth.
     *
     * @param  size        The desired size of the fingerprint
     * @param  searchDepth The desired depth of search (number of bonds)
     */
    public Fingerprinter(int size, int searchDepth) {
        this.size = size;
        this.searchDepth = searchDepth;

    }

    @Override
    protected List<Map.Entry<String, String>> getParameters() {
        return Arrays.asList(
            new SimpleImmutableEntry<>("searchDepth", Integer.toString(searchDepth)),
            new SimpleImmutableEntry<>("pathLimit", Integer.toString(pathLimit)),
            new SimpleImmutableEntry<>("hashPseudoAtoms", Boolean.toString(hashPseudoAtoms)),
            new SimpleImmutableEntry<>("hashExplicitHydrogens", Boolean.toString(hashExplHydrogens))
        );
    }

    /**
     * Generates a fingerprint of the default size for the given AtomContainer.
     *
     * @param container The AtomContainer for which a Fingerprint is generated
     * @param ringFinder An instance of
     *                   {@link org.openscience.cdk.ringsearch.AllRingsFinder}
     * @exception CDKException if there is a timeout in ring or aromaticity
     *                         perception
     * @return A {@link BitSet} representing the fingerprint
     */
    public IBitFingerprint getBitFingerprint(IAtomContainer container, AllRingsFinder ringFinder) throws CDKException {
        logger.debug("Entering Fingerprinter");
        logger.debug("Starting Aromaticity Detection");
        long before = System.currentTimeMillis();
        if (!hasPseudoAtom(container.atoms())) {
            AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(container);
            Aromaticity.cdkLegacy().apply(container);
        }
        long after = System.currentTimeMillis();
        logger.debug("time for aromaticity calculation: " + (after - before) + " milliseconds");
        logger.debug("Finished Aromaticity Detection");
        BitSet bitSet = new BitSet(size);
        encodePaths(container, searchDepth, bitSet, size);
        return new BitSetFingerprint(bitSet);
    }

    /**
     * Generates a fingerprint of the default size for the given AtomContainer.
     *
     *@param container The AtomContainer for which a Fingerprint is generated
     */
    @Override
    public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKException {
        return getBitFingerprint(container, null);
    }

    /** {@inheritDoc} */
    @Override
    public Map<String, Integer> getRawFingerprint(IAtomContainer container) throws CDKException {
        if (!hasPseudoAtom(container.atoms())) {
            AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(container);
            Aromaticity.cdkLegacy().apply(container);
        }
        Map<String,Integer> rawFp = new HashMap<>();
        BitSet bitSet = new BitSet(size);
        State state = new State(container, bitSet, size, searchDepth+1);
        state.setFeatureMap(rawFp);
        for (IAtom atom : container.atoms()) {
            state.numPaths = 0;
            state.visit(atom);
            traversePaths(state, atom, null);
            state.unvisit(atom);
        }
        return rawFp;
    }

    @Override
    public ICountFingerprint getCountFingerprint(IAtomContainer container) throws CDKException {
        return new IntArrayCountFingerprint(getRawFingerprint(container));
    }

    private IBond findBond(List<IBond> bonds, IAtom beg, IAtom end) {
        for (IBond bond : bonds)
            if (bond.contains(beg) && bond.contains(end))
                return bond;
        return null;
    }

    private String encodePath(IAtomContainer mol, List<IAtom> path, StringBuilder buffer) {
        buffer.setLength(0);
        IAtom prev = path.get(0);
        buffer.append(getAtomSymbol(prev));
        for (int i = 1; i < path.size(); i++) {
            final IAtom next  = path.get(i);
            List<IBond> bonds = mol.getConnectedBondsList(prev);
            IBond bond = findBond(bonds, next, prev);
            if (bond == null)
                throw new IllegalStateException("FATAL - Atoms in patch were connected?");
            buffer.append(getBondSymbol(bond));
            buffer.append(getAtomSymbol(next));
            prev = next;
        }
        return buffer.toString();
    }

    private String encodePath(List<IAtom> apath, List<IBond> bpath, StringBuilder buffer) {
        buffer.setLength(0);
        IAtom prev = apath.get(0);
        buffer.append(getAtomSymbol(prev));
        for (int i = 1; i < apath.size(); i++) {
            final IAtom next  = apath.get(i);
            final IBond bond  = bpath.get(i-1);
            buffer.append(getBondSymbol(bond));
            buffer.append(getAtomSymbol(next));
        }
        return buffer.toString();
    }

    private String encodeRevPath(List<IAtom> apath, List<IBond> bpath, StringBuilder buffer) {
        // atoms=[0, 1, 2, 3], bonds=[0, 1, 2]
        // len=4 a0 | b0 a1 b1 a2 b2 a3 (fwd)
        // len=4 a3 | b2 a2 b1 a1 b0 a0 (rev)
        buffer.setLength(0);
        int len = apath.size();
        IAtom prev = apath.get(len-1);
        buffer.append(getAtomSymbol(prev));
        for (int i = len-2; i >= 0; i--) {
            final IAtom next  = apath.get(i);
            final IBond bond  = bpath.get(i);
            buffer.append(getBondSymbol(bond));
            buffer.append(getAtomSymbol(next));
        }
        return buffer.toString();
    }

    private static int appendHash(int hash, String str) {
        int len = str.length();
        for (int i = 0; i < len; i++)
            hash = 31 * hash + str.charAt(0);
        return hash;
    }

    private int hashPath(List<IAtom> apath, List<IBond> bpath) {
        int hash = 0;
        hash = appendHash(hash, getAtomSymbol(apath.get(0)));
        for (int i = 1; i < apath.size(); i++) {
            final IAtom next  = apath.get(i);
            final IBond bond  = bpath.get(i-1);
            hash = appendHash(hash, getBondSymbol(bond));
            hash = appendHash(hash, getAtomSymbol(next));
        }
        return hash;
    }

    private int hashRevPath(List<IAtom> apath, List<IBond> bpath) {
        int hash = 0;
        int last = apath.size() - 1;
        hash = appendHash(hash, getAtomSymbol(apath.get(last)));
        for (int i = last-1; i >= 0; i--) {
            final IAtom next  = apath.get(i);
            final IBond bond  = bpath.get(i);
            hash = appendHash(hash, getBondSymbol(bond));
            hash = appendHash(hash, getAtomSymbol(next));
        }
        return hash;
    }

    private final class State {
        private int    numPaths = 0;
        private final Random rand     = new Random();
        private final BitSet fp;
        private Map<String,Integer> feats;
        private final IAtomContainer mol;
        private final Set<IAtom> visited = new HashSet<>();
        private final List<IAtom> apath = new ArrayList<>();
        private final List<IBond> bpath = new ArrayList<>();
        private final int maxDepth;
        private final int fpsize;
        public final StringBuilder buffer = new StringBuilder();

        public State(IAtomContainer mol, BitSet fp, int fpsize, int maxDepth) {
            this.mol = mol;
            this.fp  = fp;
            this.fpsize = fpsize;
            this.maxDepth = maxDepth;
        }

        public void setFeatureMap(Map<String,Integer> feats) {
            this.feats = feats;
        }

        List<IBond> getBonds(IAtom atom) {
            return mol.getConnectedBondsList(atom);
        }

        boolean visit(IAtom a) {
            return visited.add(a);
        }

        boolean unvisit(IAtom a) {
            return visited.remove(a);
        }

        void push(IAtom atom, IBond bond) {
            apath.add(atom);
            if (bond != null)
                bpath.add(bond);
        }

        void pop() {
            if (!apath.isEmpty())
                apath.remove(apath.size()-1);
            if (!bpath.isEmpty())
                bpath.remove(bpath.size()-1);
        }

        void addHash(int x) {
            numPaths++;
            rand.setSeed(x);
            // XXX: fp.set(x % size); would work just as well but would encode a
            //      different bit
            fp.set(rand.nextInt(fpsize));
        }

        private void storeFeat(String path) {
            if (feats == null)
                return;
            feats.compute(path, (k, v) -> v == null ? 1 : v+1);
        }

        private void storeForward() {
            addHash(hashPath(apath, bpath));
            if (feats != null) {
                storeFeat(encodePath(apath, bpath, buffer));
            }
        }

        private void storeReverse() {
            addHash(hashRevPath(apath, bpath));
            if (feats != null) {
                storeFeat(encodeRevPath(apath, bpath, buffer));
            }
        }

        /**
         * Optimisation - determine if the path if lexicographically smaller
         * forwards rather than backwards. When we come to actually hash the
         * path we hash it forwards and backwards and store the lowest so only
         * need to do that more expensive encoding once.
         * We can do this a couple of ways for example atom index - but since
         * that may be a linear time lookup (at least in the old IAtomContainer
         * implementation) we use the identity hash code (memory address).
         *
         * @return true - do encode/false - skip encoding
         */
        public boolean isOrderedPath() {
            return System.identityHashCode(apath.get(0)) <
                    System.identityHashCode(apath.get(apath.size()-1));
        }

        public void storePath() {
            if (bpath.isEmpty()) {
                addHash(getAtomSymbol(apath.get(0)).hashCode());
                storeFeat(getAtomSymbol(apath.get(0)));
            } else {
                if (!isOrderedPath())
                    return;
                if (compare(apath, bpath) >= 0) {
                    storeForward();
                } else {
                    storeReverse();
                }
            }
        }
    }

    private void traversePaths(State state, IAtom beg, IBond prev) throws CDKException {
        if (skipAtom(beg))
            return;
        state.push(beg, prev);
        state.storePath();
        if (state.numPaths > pathLimit)
            throw new CDKException("Too many paths! Structure is likely a cage, reduce path length or increase path limit");
        if (state.apath.size() < state.maxDepth) {
            for (IBond bond : state.getBonds(beg)) {
                if (bond.equals(prev))
                    continue;
                final IAtom nbr = bond.getOther(beg);
                if (state.visit(nbr)) {
                    traversePaths(state, nbr, bond);
                    state.unvisit(nbr); // traverse all paths
                }
            }
        }
        state.pop();
    }

    private boolean skipAtom(IAtom beg) {
        int elem = getElem(beg);
        return !hashPseudoAtoms && elem == IAtom.Wildcard ||
                !hashExplHydrogens && elem == IAtom.H;
    }

    /**
     * Get all paths of lengths 0 to the specified length.
     *
     * This method will find all paths up to length N starting from each
     * atom in the molecule and return the unique set of such paths.
     *
     * @param container The molecule to search
     * @param searchDepth The maximum path length desired
     * @return A Map of path strings, keyed on themselves
     * @deprecated Use {@link #encodePaths(IAtomContainer, int, BitSet, int)}
     */
    @Deprecated
    protected int[] findPathes(IAtomContainer container, int searchDepth) throws CDKException {

        Set<Integer> hashes = new HashSet<>();

        StringBuilder buffer = new StringBuilder();
        for (IAtom startAtom : container.atoms()) {
            List<List<IAtom>> p = PathTools.getLimitedPathsOfLengthUpto(container, startAtom, searchDepth, pathLimit);
            for (List<IAtom> path : p) {
                if ((hashPseudoAtoms || !hasPseudoAtom(path)) &&
                    (hashExplHydrogens || !hasExplHydrogen(path)))
                    hashes.add(encodeUniquePath(container, path, buffer));
            }
        }

        int   pos = 0;
        int[] result = new int[hashes.size()];
        for (Integer hash : hashes)
            result[pos++] = hash;

        return result;
    }

    protected void encodePaths(IAtomContainer mol, int depth, BitSet fp, int size) throws CDKException {
        State state = new State(mol, fp, size, depth+1);
        for (IAtom atom : mol.atoms()) {
            state.numPaths = 0;
            state.visit(atom);
            traversePaths(state, atom, null);
            state.unvisit(atom);
        }
    }


    private static boolean hasPseudoAtom(Iterable<IAtom> path) {
        for (IAtom atom : path)
            if (getElem(atom) == IAtom.Wildcard)
                return true;
        return false;
    }

    private static boolean hasExplHydrogen(Iterable<IAtom> path) {
        for (IAtom atom : path)
            if (getElem(atom) == IAtom.H)
                return true;
        return false;
    }

    private int encodeUniquePath(IAtomContainer container,
                                 List<IAtom> path,
                                 StringBuilder buffer) {
        if (path.size() == 1)
            return getAtomSymbol(path.get(0)).hashCode();

        String forward = encodePath(container, path, buffer);
        Collections.reverse(path);
        String reverse = encodePath(container, path, buffer);
        Collections.reverse(path);

        final int x;
        if (reverse.compareTo(forward) < 0)
            x = forward.hashCode();
        else
            x = reverse.hashCode();
        return x;
    }

    /**
     * Compares atom symbols lexicographical
     * @param a atom a
     * @param b atom b
     * @return comparison &lt;0 a is less than b, &gt;0 a is more than b
     */
    private static int compare(IAtom a, IAtom b) {
        final int elemA = getElem(a);
        final int elemB = getElem(b);
        if (elemA == elemB)
            return 0;
        return getAtomSymbol(a).compareTo(getAtomSymbol(b));
    }

    /**
     * Compares bonds symbols lexicographical
     * @param a bond a
     * @param b bond b
     * @return comparison &lt;0 a is less than b, &gt;0 a is more than b
     */
    private int compare(IBond a, IBond b) {
        return getBondSymbol(a).compareTo(getBondSymbol(b));
    }

    /**
     * Compares a path of atoms with it's self to give the
     * lexicographically lowest traversal (forwards or backwards).
     * @param apath path of atoms
     * @param bpath path of bonds
     * @return &lt;0 forward is lower &gt;0 reverse is lower
     */
    private int compare(List<IAtom> apath, List<IBond> bpath) {
        int i    = 0;
        int len = apath.size();
        int j    = len - 1;
        int cmp = compare(apath.get(i), apath.get(j));
        if (cmp != 0)
            return cmp;
        i++;
        j--;
        while (j != 0) {
            cmp = compare(bpath.get(i-1), bpath.get(j));
            if (cmp != 0) return cmp;
            cmp = compare(apath.get(i), apath.get(j));
            if (cmp != 0) return cmp;
            i++;
            j--;
        }
        return 0;
    }

    private static int getElem(IAtom atom) {
        Integer elem = atom.getAtomicNumber();
        if (elem == null)
            elem = 0;
        return elem;
    }

    private static String getAtomSymbol(IAtom atom) {
        // XXX: backwards compatibility
        // This is completely random, I believe the intention is because
        // paths were reversed with string manipulation to de-duplicate
        // (only the lowest lexicographically is stored) however this
        // doesn't work with multiple atom symbols:
        // e.g. Fe-C => C-eF vs C-Fe => eF-C
        // A dirty hack is to replace "common" symbols with single letter
        // equivalents so the reversing is less wrong
        switch (getElem(atom)) {
            case 0:  // *
                return "*";
            case 6:  // C
                return "C";
            case 7:  // N
                return "N";
            case 8:  // O
                return "O";
            case 17: // Cl
                return "X";
            case 35: // Br
                return "Z";
            case 14: // Si
                return "Y";
            case 33: // As
                return "D";
            case 3: // Li
                return "L";
            case 34: // Se
                return "E";
            case 11:  // Na
                return "G";
            case 20:  // Ca
                return "J";
            case 13:  // Al
                return "A";
        }
        return atom.getSymbol();
    }

    /**
     *  Gets the bondSymbol attribute of the Fingerprinter class
     *
     *@param  bond  Description of the Parameter
     *@return       The bondSymbol value
     */
    protected String getBondSymbol(IBond bond) {
        if (bond.isAromatic())
            return ":";
        switch (bond.getOrder()) {
            case SINGLE:
                return "-";
            case DOUBLE:
                return "=";
            case TRIPLE:
                return "#";
            default:
                return "";
        }
    }

    public void setPathLimit(int limit) {
        this.pathLimit = limit;
    }

    /**
     * Include pseudo/query atoms in the fingerprint with atomic number 0.
     * Generally for substructure screening, which path based fingerprints are
     * most useful, this is not wanted.
     *
     * @param value the setting (false by default)
     */
    public void setHashPseudoAtoms(boolean value) {
        this.hashPseudoAtoms = value;
    }

    /**
     * Include explicit hydrogen atoms in the fingerprint. This means you
     * get a different fingerprint if hydrogens are implicit/explicit.
     * Generally for substructure screening, which path based fingerprints are
     * most useful, this is not wanted.
     *
     * @param value the setting (false by default)
     */
    public void setHashExplicitHydrogens(boolean value) {
        this.hashExplHydrogens = value;
    }

    public int getSearchDepth() {
        return searchDepth;
    }

    @Override
    public int getSize() {
        return size;
    }
}
