import { z } from 'zod';

import { UnixTimestampSchema } from './dates';
import { ErrorResponse } from './error';
import { HubSpotCompany } from './hubspot/company';

/**
 * Schema for media content or thumbnail
 */
const MediaContentSchema = z.object({
	url: z.string().url().optional()
		.describe('URL of the media content'),
	type: z.string().optional()
		.describe('MIME type of the media content'),
	width: z.string().optional()
		.describe('Width of the media content'),
	height: z.string().optional()
		.describe('Height of the media content'),
}).describe('Media content or thumbnail details');

/**
 * Schema for a media item attached to a feed (Attachment, Video, etc)
 */
export const FeedMediaSchema = z.object({
	title: z.string().optional()
		.describe('Title of the media item'),
	content: MediaContentSchema.optional()
		.describe('Main media content details'),
	thumbnail: MediaContentSchema.optional()
		.describe('Thumbnail image details'),
	description: z.string().optional()
		.describe('Description of the media item'),
}).describe('Media item attached to a feed');

/**
 * Schema for a single story item
 */
export const FeedItemSchema = z.object({
	title: z.string().optional()
		.describe('Title of the story'),
	date: UnixTimestampSchema.optional()
		.describe('Publication date'),
	updated: UnixTimestampSchema.optional()
		.describe('Last update date'),
	link: z.string().url().optional()
		.describe('URL of the story'),
	guid: z.string().optional()
		.describe('Globally unique identifier'),
	author: z.string().optional()
		.describe('Author of the story'),
	summary: z.string().optional()
		.describe('Brief summary of the story'),
	description: z.string().optional()
		.describe('Full description or excerpt'),
	fulltext: z.string().optional()
		.describe('Complete story content'),
	categories: z.array(z.string()).optional()
		.describe('Story categories'),
	tickers: z.array(z.string()).optional()
		.describe('Stock tickers mentioned'),
	language: z.string().optional()
		.describe('Language of the story'),
	media: z.array(FeedMediaSchema).optional()
		.describe('Media items attached to the story'),
	is_current_story: z.boolean().optional()
		.describe('If true, this is the story for the page you are currently on'),
	version: z.number().int().optional()
		.describe('Version of the story'),
	remove: z.boolean().optional()
		.describe('If true, this story should not be included in the feed'),
}).describe('A single story item from a feed');

// Base schema without refinement
const DetectFeedRequestBaseSchema = z.object({
	id: z.string().optional()
		.describe('Unique ID for this request'),
	url: z.string().url().optional()
		.describe('URL of the page to run detection on'),
	feed_url: z.string().url().optional()
		.describe('Feed URL'),
	hubspot_id: z.string().optional()
		.describe('ID of the Hubspot record'),
	classify: z.boolean().optional()
		.describe('Set to true to run the classification engine'),
	meta: z.record(z.string()).optional()
		.describe('Meta tag data from extension'),
	ld_meta: z.array(z.any()).optional()
		.describe('LD JSON metadata'),
	html_content: z.string().optional()
		.describe('HTML content from extension'),
	s3: z.object({
		bucket: z.string(),
		prefix: z.string(),
	}).optional()
		.describe('S3 location for response parts'),
	bypass_robots: z.boolean().optional()
		.describe('Allow skipping robots lookup'),
	limit: z.number().int().positive().optional()
		.describe('Limit number of articles to return'),
});

/**
 * Schema for feed detection request
 */
export const DetectFeedRequestSchema = DetectFeedRequestBaseSchema.refine(
	(data) => data.url !== undefined || data.feed_url !== undefined,
	{ message: 'Either url or feed_url must be provided' },
).describe('Feed detection request parameters');

/**
 * Schema for detected statistics
 */
export const DetectedStatsSchema = z.object({
	total_articles: z.number().int().optional()
		.describe('Total number of articles found'),
	total_words: z.number().int().optional()
		.describe('Total word count across all articles'),
	average_word_count: z.number().optional()
		.describe('Average words per article'),
	full_text_articles: z.number().int().optional()
		.describe('Number of articles with full text'),
	qualified_articles: z.number().int().optional()
		.describe('Number of articles meeting quality criteria'),
}).describe('Statistics about detected articles');

/**
 * Schema for a detected article
 */
export const DetectedArticleSchema = FeedItemSchema.extend({
	$type: z.literal('DetectedArticle').optional(),
	headline_score: z.number().optional()
		.describe('Overall headline quality score'),
	headline_scores: z.record(z.number()).optional()
		.describe('Headline scores by client'),
	newstex_categories: z.record(z.number()).optional()
		.describe('Category scores'),
	stats: z.object({
		character_count: z.number().int(),
		line_count: z.number().int(),
		word_count: z.number().int(),
	}).optional()
		.describe('Article statistics'),
	is_full_text: z.boolean().optional()
		.describe('Whether article contains full text'),
}).describe('A detected and analyzed article');

/**
 * Schema for feed detection response
 */
export const DetectFeedResponseSchema = z.object({
	...DetectFeedRequestBaseSchema.shape,
	possible_feed_urls: z.array(z.string().url()).optional()
		.describe('Possible feed URLs in order of preference'),
	name: z.string().optional()
		.describe('Feed or website title'),
	description: z.string().optional()
		.describe('Feed or website description'),
	authors: z.array(z.string()).optional()
		.describe('List of authors'),
	articles: z.array(DetectedArticleSchema).optional()
		.describe('Detected articles'),
	categories: z.array(z.string()).optional()
		.describe('Feed categories'),
	last_post_date: z.string().optional()
		.describe('Date of most recent post'),
	first_post_date: z.string().optional()
		.describe('Date of oldest post'),
	raw_headers: z.record(z.string()).optional()
		.describe('Raw HTTP headers'),
	headline_score: z.number().optional()
		.describe('Overall headline quality score'),
	headline_scores: z.record(z.number()).optional()
		.describe('Headline scores by client'),
	newstex_categories: z.record(z.number()).optional()
		.describe('Category scores'),
	publications: z.array(z.any()).optional()
		.describe('Related publications'),
	stats: DetectedStatsSchema.optional()
		.describe('Feed statistics'),
	qualified: z.boolean().optional()
		.describe('Whether feed meets quality criteria'),
	rejection_reasons: z.array(z.string()).optional()
		.describe('Reasons for rejection if not qualified'),
	language: z.string().optional()
		.describe('Feed language'),
	pubDate: UnixTimestampSchema.optional()
		.describe('Feed publication date'),
	link: z.string().url().optional()
		.describe('Feed link'),
	suggested_clients: z.array(z.string()).optional()
		.describe('Clients who might be interested'),
	twitter: z.string().optional()
		.describe('Twitter handle'),
	facebook: z.string().optional()
		.describe('Facebook page'),
	linkedin: z.string().optional()
		.describe('LinkedIn profile'),
	instagram: z.string().optional()
		.describe('Instagram handle'),
	youtube: z.string().optional()
		.describe('YouTube channel'),
	feed_error: z.custom<ErrorResponse>().optional()
		.describe('Feed processing error'),
	hubspot_records: z.array(
		z.object({
			...z.object({}).partial().shape as any,
			id: z.string(),
		}).and(z.custom<Partial<HubSpotCompany>>()),
	).optional()
		.describe('Matching HubSpot records'),
	related_hubspot_records: z.array(
		z.object({
			...z.object({}).partial().shape as any,
			id: z.string(),
		}).and(z.custom<Partial<HubSpotCompany>>()),
	).optional()
		.describe('Related HubSpot records'),
	newscore_id: z.string().optional()
		.describe('NewsCore matching record ID'),
}).refine(
	(data) => data.url !== undefined || data.feed_url !== undefined,
	{ message: 'Either url or feed_url must be provided' },
).describe('Feed detection response');

/**
 * Schema for feed detection response with article references
 */
export const DetectedFeedWithArticleReferencesSchema = z.object({
	url: z.string().url().optional()
		.describe('URL of the page to run detection on'),
	feed_url: z.string().url().optional()
		.describe('Feed URL'),
	articles: z.array(
		z.preprocess(
			(data: any) => {
				// Ensure $type is set to 'Story' if __type__ is 'Story'
				if (data && data.__type__ === 'Story' && !data.$type) {
					return { ...data, $type: 'Story' };
				}
				return data;
			},
			z.object({
				__type__: z.literal('Story'),
				$type: z.literal('Story').optional(),
				__id__: z.string().optional(),
				headline: z.string(),
				name: z.string().optional(), // Preserve the name field
				s3: z.object({
					bucket: z.string()
						.describe('S3 bucket containing the story'),
					key: z.string()
						.describe('S3 key for the story'),
				}),
			}),
		),
	).optional()
		.describe('Articles as S3 references'),
}).strict().superRefine((data, ctx) => {
	if (data.url === undefined && data.feed_url === undefined) {
		ctx.addIssue({
			code: z.ZodIssueCode.custom,
			message: 'Either url or feed_url must be provided',
		});
	}
}).describe('Feed detection response with article references');

// Export types inferred from schemas
export type FeedMedia = z.infer<typeof FeedMediaSchema>;
export type FeedItem = z.infer<typeof FeedItemSchema>;
export type DetectFeedRequest = z.infer<typeof DetectFeedRequestSchema>;
export type DetectedStats = z.infer<typeof DetectedStatsSchema>;
export type DetectedArticle = z.infer<typeof DetectedArticleSchema>;
export type DetectFeedResponse = z.infer<typeof DetectFeedResponseSchema>;
export type DetectedFeedWithArticleReferences = z.infer<typeof DetectedFeedWithArticleReferencesSchema>;

/**
 * Get categories that match with a score > 10
 */
export function getMatchedCategories(detectResp: DetectFeedResponse): string[] {
	let matchedCategories: string[] = [];
	if (detectResp?.newstex_categories) {
		matchedCategories = Object.entries(detectResp.newstex_categories)
			.filter(([, score]: [string, number]) => score > 10)
			.map(([id]) => id);
	} else if (detectResp.articles?.length > 0) {
		for (const article of detectResp.articles) {
			if (article.newstex_categories) {
				const entries = Object.entries(article.newstex_categories) as [string, number][];
				for (const [id, score] of entries) {
					if (score > 10 && !matchedCategories.includes(id)) {
						matchedCategories.push(id);
					}
				}
			}
		}
	}
	return matchedCategories;
}
