/**
 * Utility functions that are safe for use in the browser
 *
 * @author: Chris Moyer <cmoyer@newstex.com>
 */
import type { Story } from '@newstex/types';

import { utcFormat } from './date';
import type { Logger } from './logger';

/**
 * Remove control characters, and trim whitespace
 *
 * \x00-\x08: ASCII control characters from NULL (0) to BACKSPACE (8).
 * \x0B-\x0C: ASCII control characters VERTICAL TAB (11) and FORM FEED (12).
 * \x0E-\x1F: ASCII control characters from SHIFT OUT (14) to INFORMATION SEPARATOR ONE (31).
 * \x7F: DELETE character (127).
 *
 * @param str String to clean
 * @returns Cleaned and trimmed string
 */
export function cleanString(str: string): string {
	if (!str) {
		return '';
	}
	// eslint-disable-next-line no-control-regex
	return String(str).replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/g, '').trim();
}

/**
 * Trim a string to no more then 1024 characters (to fit into SDB's limits)
 */
export function trimString(str: string | null | undefined) {
	if (str && str.length > 1000) {
		str = str.slice(0, 1000);
		// Back up to the previous full word
		str = `${str.slice(0, str.lastIndexOf(' '))}...`;
	}
	return str;
}

/**
 * Generic Path fetching function
 */
export function getStoryPath(story: Partial<Story>, filename?: string) {
	if (!story.path) {
		// Convert a unix timestamp to JS timestamp (ms)
		const receivedAt = typeof story.received_at === 'number' ? story.received_at * 1000 : story.received_at;
		let prefix = 'stories';
		if (story.__id__.startsWith('STOK-') || story.__id__.startsWith('AMCN-')) {
			prefix = story.__id__.split('-')[0];
		}
		story.path = `${prefix}/${utcFormat('YYYY/MM', receivedAt)}/${story.__id__}`;
	}
	let resp = story.path;

	if (filename) {
		resp = `${resp}/${filename}`;
	}
	return resp;
}

/**
 * Extract just the normalized domain name from a full URL
 * @param url Full URL
 */
export function getDomainName(url: string, logger?: Logger): string {
	if (!url) {
		return '';
	}
	try {
		return new URL(url.toLowerCase())
			.hostname
			.replace(/^www\./, '');
	} catch (e) {
		logger?.error({
			message: 'Error getting domain name',
			url,
		});
		return '';
	}
}

export function getSortableName(name: string): string {
	const articles = ['THE ', 'A ', 'AN '];
	name = name.trim();
	const upperName = name.toUpperCase();
	for (const article of articles) {
		if (upperName.startsWith(article)) {
			return `${name.substring(article.length).trim()}, ${name.substring(0, article.length).trim()}`;
		}
	}
	return name;
}

/**
 * Calculate the number of matching characters between two strings,
 * accounting for potential misalignments
 * @param str1 First string to compare
 * @param str2 Second string to compare
 * @returns Number of matching characters in the best matching substring
 */
export function calculateMatchScore(str1: string, str2: string): number {
	// Convert to lowercase
	const normalized1 = (str1 || '').toLowerCase();
	const normalized2 = (str2 || '').toLowerCase();

	const len1 = normalized1.length;
	const len2 = normalized2.length;
	let maxScore = 0;

	// Try all possible alignments
	for (let offset = -len1 + 1; offset < len2; offset++) {
		let score = 0;

		for (let i = 0; i < len1; i++) {
			const j = i + offset;

			if (j >= 0 && j < len2 && normalized1[i] === normalized2[j]) {
				// If we're matching, add to the score
				score++;
			} else {
				// Otherwise, reset the score to zero, updating the max score if necessary
				maxScore = Math.max(maxScore, score);
				score = 0;
			}
		}

		// One final max score check (in case the last sequence of matching characters wasn't the best)
		maxScore = Math.max(maxScore, score);
	}

	return maxScore;
}
