import { AzureExtractionResult } from '@luminovo/http-client';
import { BoundingBox } from '../boundingBox';
import { generateId } from '../generateId';
import { Attribute, AttributeExtractionRule, ExtractionItem, Extractor } from '../types';

export function extractTableKeyValuePairs<TAttribute extends Attribute>({
    extractionRules,
}: {
    extractionRules: AttributeExtractionRule<TAttribute>[];
}): Extractor<TAttribute> {
    return {
        generator: async function* (azureExtractionResult: AzureExtractionResult): AsyncGenerator<ExtractionItem> {
            for (const table of azureExtractionResult.analyzeResult.tables) {
                for (const cell of table.cells) {
                    // Skip header row
                    if (cell.rowIndex === 0 || cell.kind === 'columnHeader') {
                        continue;
                    }

                    const context = table.cells.find((c) => c.columnIndex === 0 && c.rowIndex === cell.rowIndex);

                    if (!context) {
                        continue;
                    }

                    yield {
                        confidence: 1,
                        context,
                        value: cell,
                    };
                }
            }
        },

        extractRegion: ({ value }) => {
            return [
                {
                    id: generateId(value),
                    content: value.content,
                    pageNumber: value.boundingRegions[0].pageNumber,
                    box: BoundingBox.fromPolygons(value.boundingRegions),
                    attributes: [],
                },
            ];
        },

        extractionRules,
    };
}
