import { AzureExtractionResult } from '@luminovo/http-client';
import { Attribute, Extractor, Region } from '../types';

export async function extract<TAttribute extends Attribute>(
    azureExtractionResult: AzureExtractionResult,
    { extractors }: { extractors: Extractor<TAttribute>[] },
): Promise<Region<TAttribute>[]> {
    const result: Region<TAttribute>[] = [];

    for (const extractor of extractors) {
        for await (const item of extractor.generator(azureExtractionResult)) {
            const regions = extractor.extractRegion(item).map((region) => {
                const attributes = extractor.extractionRules.flatMap((rule) => rule(item, region));

                return {
                    ...region,
                    attributes: region.attributes.concat(attributes),
                };
            });
            result.push(...regions);
        }
    }

    const pages = azureExtractionResult.analyzeResult.pages;

    // Convert the absolute positions to relative positions
    // This is necessary because the PDF's polygons are expressed in inches
    const withRelativePositions = result.map((region) => {
        const page = pages.find((p) => p.pageNumber === region.pageNumber)!;
        const box = region.box.applyViewbox({ width: 1 / page.width, height: 1 / page.height });
        return {
            ...region,
            box,
        };
    });

    return withRelativePositions;
}
