import { escapeRegex } from '@luminovo/commons';
import { OtsFullPart } from '@luminovo/http-client';
import { AttributeExtractionRule } from '../types';

export function ruleExpectedMpns(expectedParts: OtsFullPart[]): AttributeExtractionRule<{ content: string }> {
    return (item) => {
        for (const part of expectedParts) {
            const mpns = [part.mpn].concat(part.mpn_aliases);

            if (mpns.some((mpn) => isMatch(mpn, item.content))) {
                return [
                    {
                        attr: 'part',
                        value: part,
                        confidence: 1,
                    },
                ];
            }
        }

        return [];
    };
}

function sanitizeMpn(mpn: string): string {
    return mpn.replaceAll(/\s/g, '');
}

export function generateMPNCandidates(input: string): string[] {
    const words = input.split(' ');
    const results: string[] = [...words];

    for (let i = 0; i < words.length; i++) {
        let combination = words[i];
        for (let j = i + 1; j < words.length; j++) {
            combination += words[j];
            results.push(combination);
        }
    }

    return results;
}

function isMatch(mpn: string, content: string): boolean {
    const sanitizedMpn = sanitizeMpn(mpn);
    const sanitizedContent = sanitizeMpn(content);
    if (sanitizedMpn === sanitizedContent) {
        return true;
    }

    const mpnPresentAsSubstringRegex = new RegExp(`${escapeRegex(sanitizedMpn)}`, 'i');
    // if the sanitized MPN is present as a substring, we want to guard against
    // matching longer MPNs that contain the sanitized MPN as a substring
    if (mpnPresentAsSubstringRegex.test(sanitizedContent)) {
        const mpnMatchesCandidateExactlyRegex = new RegExp(`^${escapeRegex(sanitizedMpn)},?$`, 'i');
        return generateMPNCandidates(content).some((combination) =>
            mpnMatchesCandidateExactlyRegex.test(sanitizeMpn(combination)),
        );
    }

    return false;
}
