import { CoachFeedback, EmailCoachRule } from "../types";
import { getPlainTextFromHtml, extractSentences } from "../utils";

// Categorize spam words by type for better feedback
const SPAM_WORDS = {
	shady: [
		"0 down",
		"all natural",
		"as seen on",
		"at no cost",
		"auto email removal",
		"avoid bankruptcy",
		"bulk email",
		"cable converter",
		"calling creditors",
		"cancel at any time",
		"cannot be combined",
		"casino",
		"celebrity",
		"cell phone cancer scam",
		"certified",
		"cheap",
		"cheap meds",
		"cialis",
		"clearance",
		"collect",
		"compare",
		"compare rates",
		"confidentiality",
		"congratulations",
		"consolidate debt",
		"copy dvds",
		"covid",
		"credit bureaus",
		"cures baldness",
		"direct email",
		"eliminate debt",
		"explode your business",
		"fast viagra delivery",
		"finance",
		"financial advice",
		"financial independence",
		"free access",
		"free bonus",
		"free cell phone",
		"free dvd",
		"free grant money",
		"free installation",
		"free offer",
		"free priority mail",
		"free sample",
		"free website",
		"gift card",
		"gift certificate",
		"guaranteed income",
		"guaranteed payment",
		"hidden assets",
		"hidden charges",
		"hidden costs",
		"hidden fees",
		"home based business",
		"human growth hormone",
		"instant weight loss",
		"internet marketing",
		"investment decision",
		"laser printer",
		"last day",
		"legal notice",
		"lifetime access",
		"lifetime deal",
		"limited time offer",
		"limited time only",
		"lowest insurance rates",
		"lowest interest rate",
		"luxury car",
		"meet singles",
		"miracle",
		"mlm",
		"multi-level marketing",
		"nigerian",
		"no credit check",
		"no experience",
		"not spam",
		"offshore",
		"online biz opportunity",
		"online degree",
		"online pharmacy",
		"passwords",
		"per day",
		"per week",
		"per year",
		"presently",
		"priority mail",
		"prizes",
		"profit",
		"pure profits",
		"refinance home",
		"removes wrinkles",
		"replica watches",
		"requires investment",
		"reverses aging",
		"risk free",
		"rolex",
		"save big",
		"search engine listings",
		"search engine optimisation",
		"serious offer",
		"spam free",
		"special offer",
		"stainless steel",
		"stock alert",
		"stock pick",
		"stop snoring",
		"strong buy",
		"stuff on sale",
		"subject to credit",
		"take action now",
		"this isn't spam",
		"timeshare",
		"undisclosed recipient",
		"university diplomas",
		"unsecured credit",
		"unsolicited",
		"vacation offers",
		"valium",
		"viagra",
		"vicodin",
		"warranty expired",
		"we hate spam",
		"weight loss",
		"winner",
		"winning",
		"xanax",
		"xxx",
		"you have been selected",
		"zero risk",
	],
	urgency: [
		"act now",
		"act immediately",
		"apply now",
		"before it's too late",
		"buy now",
		"call now",
		"call free",
		"click here",
		"click now",
		"contact us immediately",
		"deal ending soon",
		"do it now",
		"don't delete",
		"don't hesitate",
		"expires today",
		"final call",
		"for instant access",
		"get it now",
		"get started now",
		"hurry up",
		"immediately",
		"info you requested",
		"instant",
		"limited time",
		"now only",
		"offer expires",
		"once in lifetime",
		"order now",
		"order today",
		"please read",
		"purchase now",
		"sign up free",
		"supplies are limited",
		"take action",
		"this won't last",
		"time limited",
		"today",
		"top urgent",
		"urgent",
		"what are you waiting for",
		"while supplies last",
	],
	cheapOffers: [
		"4u",
		"acceptance",
		"access",
		"avoid bankruptcy",
		"being a member",
		"card accepted",
		"cards accepted",
		"cents on the dollar",
		"credit card offers",
		"pre-approved",
		"save $",
		"success",
		"you are a winner",
		"you have been selected",
		"you're a winner",
	],
	exaggerated: [
		"#1",
		"100% free",
		"100% satisfied",
		"additional income",
		"amazed",
		"amazing",
		"amazing offer",
		"be your own boss",
		"best bargain",
		"best deal",
		"best price",
		"big bucks",
		"billion",
		"billion dollars",
		"bonus",
		"cash bonus",
		"cashcashcash",
		"double your income",
		"drastically reduced",
		"earn $",
		"earn extra cash",
		"earn money",
		"eliminate bad credit",
		"extra cash",
		"extra income",
		"fantastic deal",
		"fast cash",
		"financial freedom",
		"free gift",
		"free money",
		"free trial",
		"full refund",
		"get out of debt",
		"get paid",
		"giveaway",
		"guaranteed",
		"income from home",
		"increase sales",
		"incredible deal",
		"join millions",
		"lowest price",
		"make money",
		"million dollars",
		"money back",
		"money making",
		"no gimmicks",
		"once in a lifetime",
		"one hundred percent guaranteed",
		"potential earnings",
		"prize",
		"pure profit",
		"risk-free",
		"satisfaction guaranteed",
		"save big money",
		"serious cash",
		"special promotion",
		"unbeatable offer",
		"unlimited",
		"why pay more",
	],
	salesMarketing: [
		"$$$",
		"£££",
		"€€€",
		"affordable",
		"bad credit",
		"bankruptcy",
		"billionaire",
		"cash out",
		"claim your discount",
		"credit card",
		"dollars",
		"don't hesitate",
		"double your wealth",
		"earn extra income",
		"earn from home",
		"for just $",
		"get money",
		"huge discount",
		"increase revenue",
		"increase sales",
		"instant earnings",
		"investment advice",
		"money-back guarantee",
		"money-making",
		"monthly payment",
		"opt in",
		"price protection",
		"us dollars",
	],
};

// Analyze text for spam words/phrases
const analyzeSpamWords = (content: string): CoachFeedback[] => {
	// Extract plain text
	const plainText = getPlainTextFromHtml(content);

	// Get sentences using the existing extraction utility
	const sentenceResult = extractSentences(plainText);
	const sentences = sentenceResult.sentences;

	// Track feedback for each spam word found
	const feedback: CoachFeedback[] = [];

	// Check each sentence for spam words
	sentences.forEach(sentence => {
		// Convert to lowercase for case-insensitive matching
		const lowerSentence = sentence.toLowerCase();

		// Check each category of spam words
		Object.entries(SPAM_WORDS).forEach(([category, wordList]) => {
			wordList.forEach(spamTerm => {
				// Skip very short terms to avoid false positives while typing
				if (spamTerm.length < 3) return;

				// Escape special regex characters and create a word boundary pattern
				const escapedTerm = spamTerm.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
				const regex = new RegExp(`\\b${escapedTerm}\\b`, "gi");

				// Find all matches in the sentence
				const matches = [...lowerSentence.matchAll(regex)];

				// Add feedback for each match
				matches.forEach(match => {
					// Skip if the match is at the end of the sentence and might be incomplete
					const matchEndPos = match.index! + match[0].length;
					const isAtEndOfSentence = matchEndPos >= lowerSentence.length - 2;
					const isPotentiallyIncomplete = isAtEndOfSentence && !sentence.match(/[.!?,;:\s]$/);

					// Skip feedback for potentially incomplete words
					if (isPotentiallyIncomplete && match[0].length < 5) return;

					let message = "";
					let severity: "info" | "warning" | "error" = "info";

					// Customize message based on category
					switch (category) {
						case "shady":
							message = `'${match[0]}' might trigger spam filters - it's commonly used in suspicious emails.`;
							severity = "warning";
							break;
						case "urgency":
							message = `'${match[0]}' creates artificial urgency that may trigger spam filters.`;
							severity = "info";
							break;
						case "cheapOffers":
							message = `'${match[0]}' is often used in spam offering cheap deals.`;
							severity = "info";
							break;
						case "exaggerated":
							message = `'${match[0]}' sounds exaggerated and may reduce credibility.`;
							severity = "info";
							break;
						case "salesMarketing":
							message = `'${match[0]}' is common in sales emails and might trigger filters.`;
							severity = "info";
							break;
						default:
							message = `'${match[0]}' is a common spam trigger word.`;
					}

					// Create a safe ID by sanitizing the spam term
					const safeId = spamTerm.replace(/[^a-z0-9]/gi, "-").toLowerCase();

					feedback.push({
						id: `spam-${category}-${safeId}`,
						message,
						severity,
						description: sentence,
					});
				});
			});
		});
	});

	return feedback;
};

// Export the rule
export const spamWordsRule: EmailCoachRule = {
	id: "spam-words",
	name: "Spam Word Detection",
	description: "Identifies words and phrases commonly found in spam that might trigger filters",
	analyze: (content: string) => analyzeSpamWords(content),
};
