import pronunciationDictionary from '../../actions/pronunciationDictionary';

export function groupCharactersIntoWords(response, offset = 0) {
    // First, strip phoneme tags from the characters array while retaining the words inside
    const cleanedResponse = stripPhonemesFromCharacters(response);
   // console.log("cleanedResponse: " + JSON.stringify(cleanedResponse))

    const { characters, character_start_times_seconds, character_end_times_seconds } = cleanedResponse;
    const words = [];
    let currentWord = '';
    let wordStartTime = null;
    let wordEndTime = null;
    let i = 0;

    while (i < characters.length) {
        let char = characters[i];
        const startTime = character_start_times_seconds[i];
        const endTime = character_end_times_seconds[i];

        if (char === ' ' || char == '\n') { // if character is a space
            if (currentWord !== '') {
                words.push({
                    word: currentWord,
                    start_time: wordStartTime + offset,
                    end_time: wordEndTime + offset
                });
                currentWord = '';
                wordStartTime = null;
                wordEndTime = null;
            }
        } else {
            if (currentWord === '') {
                wordStartTime = startTime;
            }
            currentWord += char;
            wordEndTime = endTime;
        }
        i++;
    }

    // Don't forget to add the last word if there is one
    if (currentWord !== '') {
        words.push({
            word: currentWord,
            start_time: wordStartTime + offset,
            end_time: wordEndTime + offset
        });
    }

   // console.log("words: " + JSON.stringify(words))

    return words;
}

function stripPhonemesFromCharacters(response) {
    const { characters, character_start_times_seconds, character_end_times_seconds } = response;
    const newCharacters = [];
    const newStartTimes = [];
    const newEndTimes = [];

    let i = 0;
    while (i < characters.length) {
        let char = characters[i];

        // Detect start of phoneme tag
        if (char === '<' && characters.slice(i, i + 8).join('') === '<phoneme') {
            // Skip until end of opening tag '>'
            while (i < characters.length && characters[i] !== '>') {
                i++;
            }
            i++; // Skip '>'
            // Now we are at the content inside the phoneme tag (the word)
            // Collect the word inside the phoneme tag
            while (i < characters.length) {
                char = characters[i];
                // Check if we have reached the start of the closing tag '</phoneme'
                if (char === '<' && characters.slice(i, i + 9).join('') === '</phoneme') {
                    // Skip until end of closing tag '>'
                    while (i < characters.length && characters[i] !== '>') {
                        i++;
                    }
                    i++; // Skip '>'
                    break;
                } else {
                    // Include the character and its timings
                    newCharacters.push(char);
                    newStartTimes.push(character_start_times_seconds[i]);
                    newEndTimes.push(character_end_times_seconds[i]);
                    i++;
                }
            }
        } else {
            // Include the character and its timings
            newCharacters.push(char);
            newStartTimes.push(character_start_times_seconds[i]);
            newEndTimes.push(character_end_times_seconds[i]);
            i++;
        }
    }

    return {
        characters: newCharacters,
        character_start_times_seconds: newStartTimes,
        character_end_times_seconds: newEndTimes,
    };
}

// function isWordCharacter(char) {
//     return /[a-zA-Z0-9''\u2019\-\%\—]/.test(char);  // Added em dash (—) to valid word characters
// }

// function isPunctuation(char) {
//     return /[.,!?;:]/.test(char);  // Removed em dash from punctuation
// }

// function pushWordToList(words, word, startTime, endTime, offset) {
//     word = word.replace(/([a-zA-Z])\.{3}$/, '$1,'); // replace '...' with ','

//     // Separate base word, suffix, and trailing punctuation
//     let baseWord = word;
//     let suffix = '';
//     let trailingPunctuation = '';

//     // First, check for trailing punctuation
//     const punctuationMatch = word.match(/^(.+?)([.,!?;:]+)?$/);
//     if (punctuationMatch) {
//         baseWord = punctuationMatch[1];
//         trailingPunctuation = punctuationMatch[2] || '';
//     }

//     // Then check for possessive or contraction suffixes
//     const suffixMatch = baseWord.match(/^(.+?)(['']s|s[''])$/i);
//     if (suffixMatch) {
//         baseWord = suffixMatch[1];
//         suffix = suffixMatch[2];
//     }

//     // Map adjusted word back to original word using the pronunciation dictionary
//     const dictEntry = pronunciationDictionary.find(entry =>
//         (entry.alt && baseWord.toLowerCase() === entry.alt.toLowerCase()) ||
//         (entry.word && baseWord.toLowerCase() === entry.word.toLowerCase())
//     );

//     if (dictEntry) {
//         word = dictEntry.word + suffix + trailingPunctuation;
//     } else {
//         word = baseWord + suffix + trailingPunctuation;
//     }

//     // Handle special cases if needed
//     if (word.toUpperCase() === 'YOUR') {
//         word = 'your';
//     } else if (word.toUpperCase() === 'PERFECT') {
//         word = 'perfect';
//     }

//     words.push({
//         word: word,
//         start_time: startTime + offset,
//         end_time: endTime + offset
//     });
// }