import { MODEL_MESSAGE_ROLES, ModelTrainingExample } from "@zall-bot/types";
import { MODEL_MESSAGE_ROLE_RECORD } from "../consts/MODEL_MESSAGE_ROLE_RECORD";
import { MODEL_NEW } from "../consts/MODEL_NEW";

export const toModelTrainingExamples = (value: string) => {
  const rows = value.split("\n");
  let currentTrainingExample: ModelTrainingExample = { messages: [] };
  const trainingExamples: ModelTrainingExample[] = [currentTrainingExample];

  if (rows[0] === MODEL_NEW) {
    throw new Error(`First row cannot be ${MODEL_NEW}`);
  }

  if (rows[rows.length - 1] === MODEL_NEW) {
    throw new Error(`Last row cannot be ${MODEL_NEW}`);
  }

  rows.forEach((row, index) => {
    if (row === MODEL_NEW) {
      currentTrainingExample = { messages: [] };
      trainingExamples.push(currentTrainingExample);
      return;
    }

    for (const role of MODEL_MESSAGE_ROLES) {
      const recordValue = MODEL_MESSAGE_ROLE_RECORD[role];
      const startString = `${recordValue.short}: `;

      if (row.startsWith(startString)) {
        currentTrainingExample.messages.push({
          role,
          content: row.replace(startString, "").trim(),
        });
        return;
      }
    }

    throw new Error(`#${index + 1} is invalid: ${row.trim() || "empty line"}`);
  });

  const everyHasAssistantMessage = trainingExamples.every((e) =>
    e.messages.map((m) => m.role).includes("ASSISTANT")
  );

  if (!everyHasAssistantMessage) {
    throw new Error(`Every example must have at least 1 assistant message`);
  }

  if (trainingExamples.length < 10) {
    throw new Error(
      `Training file has ${trainingExamples.length} examples, but must have at least 10`
    );
  }

  return trainingExamples;
};
