import type { VectorDocument } from "../types";

export const getShortenedText = (text: string, wordLimit = 8) => {
	const cleanedText = text.replace(/\n/g, " ");
	const words = cleanedText.split(" ");
	if (words.length > wordLimit) {
		return `${words.slice(0, wordLimit).join(" ")}...`;
	}
	return cleanedText;
};

const JBJS_FULL_ARTICLE_URL = "https://www.jbjs.org/reader.php?rsuite_id=";

export const getChapterTextAndSource = ({
	metadata,
	pageContent,
}: VectorDocument) => {
	const sourceId = metadata.source_id;
	const title = metadata.title;
	const referenceText = pageContent;
	const publisher = metadata.publisher;
	const author = metadata.author;
	const rsuiteId = metadata.rsuiteId;

	const shortenedReferenceText = getShortenedText(referenceText);

	const url = rsuiteId ? `${JBJS_FULL_ARTICLE_URL}${rsuiteId}` : null;

	return {
		sourceId,
		title,
		url,
		shortenedReferenceText,
		referenceText,
		publisher,
		author,
	};
};
