import * as cheerio from "cheerio";
import nlp from "compromise";
import {GeneralScore} from "../../../elements/scoreIndicator/ScoreIndicator";

// Based on: https://yoast.com/how-to-use-headings-on-your-site/

export type Section = {
    heading?: string, // Undefined = No header used, e.g. for introduction
    wordCount: number,
};

export type SubheadingDistributionScore = {
    score: GeneralScore,
    sections: Section[]
};

const headings = "h1, h2, h3, h4, h5, h6";
export const maxWordsPerSection = 300;

const countWords = (content: string, heading?: string): Section => ({
    heading: heading,
    wordCount: nlp(content).wordCount()
});

const getInitialSection = ($: cheerio.CheerioAPI): (Section | null) => {
    // @ts-ignore
    const bodyChildren = $($("*").get(2)?.children);
    const firstElInContent = bodyChildren.first();
    if (firstElInContent.is(headings)) {
        return null;
    }

    // Document starts without initial heading, so we need to find the length of the first section.
    const textBeforeHeading: string = firstElInContent.text() + firstElInContent.nextUntil(headings).text();
    return countWords(textBeforeHeading);
}

export const checkSubheadingDistribution = (htmlContent: string): SubheadingDistributionScore => {
    const $ = cheerio.load(htmlContent);
    const sections: Section[] = [];

    // Prevent concat of separate words during .text()
    $("*").append(" ")

    const initialSection: (Section | null) = getInitialSection($);
    if (initialSection !== null) {
        sections.push(initialSection);
    }

    $(headings).each((i, headingEl) => {
        const currentHeading = $(headingEl);
        const sectionName = currentHeading.text();
        const sectionContent = currentHeading.nextUntil(headings).text();
        sections.push(countWords(sectionContent, sectionName));
    });

    return {
        score: sections.filter(s => s.wordCount >= maxWordsPerSection).length > 0 ? "bad" : "good",
        sections: sections
    }
};
