import { z } from 'zod';

import { LegacyBaseObjectSchema } from './base';
import type { Category } from './category';
import type { Publication } from './content';
import { UnixTimestampSchema } from './dates';
import { FeedSchema } from './feed';
import type { Product } from './product';
import type { ReferenceString } from './reference';
import type { Ticker } from './ticker';

/**
 * Schema for story body statistics
 */
export const StoryStatsSchema = z.object({
	character_count: z.number()
		.describe('Number of characters in the story body'),
	line_count: z.number()
		.describe('Number of lines in the story body'),
	word_count: z.number()
		.describe('Number of words in the story body'),
	sentiment_score: z.number().optional()
		.describe('Sentiment analysis score for the story'),
});

export type StoryStats = z.infer<typeof StoryStatsSchema>;

/**
 * A single story (piece of content)
 *
 * TableName: Story
 * @author: Chris Moyer <cmoyer@newstex.com>
 */
export const StorySchema = z.preprocess(
	(data: any) => {
		// Ensure $type is set to 'Story' if __type__ is 'Story'
		if (data && data.__type__ === 'Story' && !data.$type) {
			return { ...data, $type: 'Story' };
		}
		return data;
	},
	LegacyBaseObjectSchema.extend({
		// TODO: Remove this once we can migrate all Story objects to the new format (or find a better
		// way to handle normalizations)
		__type__: z.literal('Story'),
		$type: z.literal('Story'),
		headline: z.string()
			.describe('Headline/title of the story'),
		normalized_headline: z.string().optional()
			.describe('Normalized headline (for searching purposes)'),
		permalink: z.string().optional()
			.describe('URL to the story'),
		excerpt: z.string().optional()
			.describe('Short summary'),
		text_content: z.string().optional()
			.describe('Full text-version of the story'),
		html_content: z.string().optional()
			.describe('HTML version of the story'),
		language: z.string().optional()
			.describe('Language this story is written in'),
		thumb_url: z.string().optional()
			.describe('Optional URL to a thumbnail image for this story'),
		content_type: z.string().optional()
			.describe('Media type of the story'),
		video_url: z.string().optional()
			.describe('URL to video content'),
		source: z.string().optional()
			.describe('Author'),
		pub_type: z.string().optional()
			.describe('Publication Type'),
		publication: (z.string().optional() as z.ZodType<ReferenceString<Publication>>)
			.describe('ID of the Publication that authored this story'),
		content: z.array(z.string()).optional()
			.describe('Newstex IDs of all content associated with this story'),
		external_id: z.string().optional()
			.describe('Publication-provided unique ID for this story'),
		products: (z.array(z.string()).optional() as z.ZodType<ReferenceString<Product>[]>)
			.describe('Products'),
		provider_tickers: (z.array(z.string()).optional() as z.ZodType<ReferenceString<Ticker>[]>)
			.describe('Tickers from the publisher'),
		company_tickers: (z.array(z.string()).optional() as z.ZodType<ReferenceString<Ticker>[]>)
			.describe('Tickers we identified'),
		provider_categories: z.array(z.string()).optional()
			.describe('Publication-provided categories'),
		categories: (z.array(z.string()).optional() as z.ZodType<ReferenceString<Category>[]>)
			.describe('Newstex identified categories'),
		category_scores: z.record(z.string(), z.number()).optional()
			.describe('Scores for each category with matching markers'),
		version: z.number().optional()
			.describe('Version number'),
		status: z.enum(['Standby', 'Active', 'Removed']).optional()
			.describe('Active if this is a live story'),
		date: UnixTimestampSchema.optional()
			.describe('Publication date'),
		updated: UnixTimestampSchema.optional()
			.describe('Publication updated date'),
		created_at: UnixTimestampSchema.optional()
			.describe('Date created'),
		modified_at: UnixTimestampSchema.optional()
			.describe('Date Modified'),
		received_at: z.string().regex(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/).optional()
			.describe('Date Received'),
		sys_modstamp: UnixTimestampSchema.optional()
			.describe('Last time this record was updated by ANYTHING'),
		original_hash: z.string().optional()
			.describe('MD5 Hash of the raw story (used for update detection)'),
		feed: z.union([z.string(), FeedSchema]).optional()
			.describe('Feed this was generated from'),
		ai_scores: z.record(z.string(), z.number()).optional()
			.describe('AI Qualification scores'),
		path: z.string().optional()
			.describe('S3 path to store files for this story'),
		stats: StoryStatsSchema.optional()
			.describe('Story body stats'),
		embedding: z.array(z.number()).optional()
			.describe('AI Embedding for the story'),
		Publication: z.lazy(() => z.any().optional() as z.ZodType<Partial<Publication>>)
			.describe('Joined fields from Searches in TypeSense'),
	}),
);

export type Story = z.infer<typeof StorySchema>;

/**
 * Reference to a story stored in S3
 */
export const StoryReferenceSchema = z.preprocess(
	(data: any) => {
		// Ensure $type is set to 'Story' if __type__ is 'Story'
		if (data && data.__type__ === 'Story' && !data.$type) {
			return { ...data, $type: 'Story' };
		}
		return data;
	},
	z.object({
		__type__: z.literal('Story'),
		$type: z.literal('Story').optional(),
		__id__: z.string().optional(),
		headline: z.string(),
		s3: z.object({
			bucket: z.string()
				.describe('S3 bucket containing the story'),
			key: z.string()
				.describe('S3 key for the story'),
		}),
	}),
);

export type StoryReference = z.infer<typeof StoryReferenceSchema>;

export function isStory(obj: any, tableName?: string): obj is Story {
	return obj?.__type__ === 'Story' || tableName === 'Story';
}
