import { cosineSimilarity } from '@newstex/core/math';
import { KnowledgeBase } from '@newstex/types/rag';

/**
 * Calculates the similarity between two text strings using cosine similarity
 * This is used for RAG reranking to improve search result relevance
 *
 * @param text1 - The first text string to compare
 * @param text2 - The second text string to compare
 * @returns A similarity score between 0 and 1, where 1 indicates identical texts
 */
export function getTextSimilarity(text1: string, text2: string): number {
	if (!text1 || !text2) {
		return 0;
	}

	// Normalize texts: convert to lowercase, remove punctuation, collapse multiple spaces
	const normalize = (text: string) => text.toLowerCase()
		.replace(/[^\w\s]/g, '') // Remove punctuation
		.replace(/\s+/g, ' ') // Collapse multiple spaces
		.trim();

	const words1 = normalize(text1).split(' ');
	const words2 = normalize(text2).split(' ');

	// Create word frequency vectors
	const uniqueWords = new Set([...words1, ...words2]);
	const vector1 = new Array(uniqueWords.size).fill(0);
	const vector2 = new Array(uniqueWords.size).fill(0);

	// Create a map of words to vector indices
	const wordToIndex = new Map([...uniqueWords].map((word, index) => [word, index]));

	// Fill vectors with word frequencies
	for (const word of words1) {
		const index = wordToIndex.get(word)!;
		vector1[index]++;
	}

	for (const word of words2) {
		const index = wordToIndex.get(word)!;
		vector2[index]++;
	}

	// Calculate cosine similarity
	return cosineSimilarity(vector1, vector2);
}

/**
 * Reranks an array of documents based on their similarity to a query
 *
 * @param query - The search query
 * @param documents - Array of KnowledgeBase documents
 * @returns Reranked array of documents
 */
export function rerank(
	query: string,
	documents: KnowledgeBase[],
): KnowledgeBase[] {
	// Calculate similarity scores for each document
	const scoredDocs = documents.map((doc) => ({
		doc,
		score: Math.max(
			// Use the highest similarity score between the query and the document title or any of its questions
			getTextSimilarity(query, doc.title),
			...(doc.questions || []).map((q) => getTextSimilarity(query, q)),
		),
	}));

	// Sort by similarity score in descending order
	return scoredDocs
		.sort((a, b) => b.score - a.score)
		.map(({ doc }) => doc);
}
