import { Author } from "../api/author-cleaner-api-types";

const stopWords = new Set(["the"]);

const removeAccents = (str: string) => {
  return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
};

const removeStopWords = (str: string) => {
  return str
    .split(" ")
    .filter((word) => !stopWords.has(word))
    .join(" ");
};

const removePunctuation = (str: string) => {
  return str.replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "");
};

const normalizeAuthorName = (name: string): string => {
  return removeStopWords(
    removeAccents(removePunctuation(name.toLowerCase()).trim())
  );
};

type Name = {
  first: string;
  middle?: string;
  last?: string;
};
const splitName = (name: string): Name => {
  const words = name.split(" ");

  const first = words[0];
  const middle = words.length > 2 ? words[1] : undefined;
  const last = words.length > 1 ? words[words.length - 1] : undefined;

  return {
    first: first,
    middle: middle,
    last: last,
  };
};

const isSquishedName = (nameA: Name, nameB: string): boolean => {
  return (
    nameA.last !== undefined &&
    ((nameB.startsWith(nameA.first) && nameB.endsWith(nameA.last)) ||
      (nameB.endsWith(nameA.first) && nameB.startsWith(nameA.last)))
  );
};

const isUserName = (nameA: Name, nameB: Name): boolean => {
  return (
    // name A has a last name but name b doesn't
    nameA.last !== undefined &&
    nameB.last === undefined &&
    (`${nameA.first[0]}${nameA.last}` === nameB.first ||
      `${nameA.last}${nameA.first[0]}` === nameB.first)
  );
};

const nameBlackList = new Set(["unknown", "anonymous", "anon", "none"]);

const hasValidName = (author: Author) => {
  return !nameBlackList.has(author.name.toLowerCase());
};

const namesMatch = (authorA: Author, authorB: Author) => {
  if (!hasValidName(authorA) || !hasValidName(authorB)) {
    return false;
  }

  const nameASplit = splitName(normalizeAuthorName(authorA.name));
  const nameBSplit = splitName(normalizeAuthorName(authorB.name));

  // both have last name
  if (nameASplit.last !== undefined && nameBSplit.last !== undefined) {
    // check for both the case FirstName LastName or LastName FirstName
    return (
      (nameASplit.first === nameBSplit.first &&
        nameASplit.last === nameBSplit.last) ||
      (nameASplit.first === nameBSplit.last &&
        nameASplit.last === nameBSplit.first)
    );
  }

  // nameASplit has last name, nameBSplit does not
  if (nameASplit.last !== undefined && nameBSplit.last === undefined) {
    return (
      isSquishedName(nameASplit, nameBSplit.first) ||
      isUserName(nameASplit, nameBSplit)
    );
  }

  // nameASplit does not have last name, nameBSplit does
  if (nameASplit.last === undefined && nameBSplit.last !== undefined) {
    return (
      isSquishedName(nameBSplit, nameASplit.first) ||
      isUserName(nameBSplit, nameASplit)
    );
  }

  // both only have a first name
  if (
    nameASplit.last === undefined &&
    nameBSplit.last === undefined &&
    nameASplit.first.length > 6 &&
    nameBSplit.first.length > 6
  ) {
    return nameASplit.first === nameBSplit.first;
  }

  return false;
};

const normalizeEmail = (email: string): string => {
  return removeAccents(email.toLowerCase().split("@")[0].replace(".", ""));
};

const emailBlackList = new Set([
  "unknown",
  "anonymous",
  "devnull",
  "localhost",
  "noreply",
]);

const hasValidEmail = (author: Author) => {
  for (const blackList of emailBlackList) {
    if (
      author.email.toLowerCase().includes(blackList) &&
      !author.email.toLowerCase().includes("users.noreply.github.com")
    ) {
      return false;
    }
  }

  return true;
};

const emailsMatch = (authorA: Author, authorB: Author) => {
  if (!hasValidEmail(authorA) || !hasValidEmail(authorB)) {
    return false;
  }

  // if email are exactly the same, consider it a match
  if (authorA.email === authorB.email) {
    return true;
  }

  const normalizedEmailA = normalizeEmail(authorA.email);
  const normalizedEmailB = normalizeEmail(authorB.email);

  // using length of 4 as a heuristic to filter out emails like "i@..." and also "mail@..."
  return (
    normalizedEmailA.length > 4 &&
    normalizedEmailB.length > 4 &&
    normalizedEmailA === normalizedEmailB
  );
};

const nameEmailMatch = (name: Name, email: string) => {
  if (
    name.last !== undefined &&
    name.last.length > 3 &&
    name.first.length > 3
  ) {
    return email.includes(name.first) && email.includes(name.last);
  }

  return false;
};

const nameMatchesEmail = (authorA: Author, authorB: Author) => {
  const nameASplit = splitName(normalizeAuthorName(authorA.name));
  const nameBSplit = splitName(normalizeAuthorName(authorB.name));

  return (
    nameEmailMatch(nameASplit, authorB.email) ||
    nameEmailMatch(nameBSplit, authorA.email)
  );
};

const isMatch = (authorA: Author, authorB: Author) => {
  return (
    namesMatch(authorA, authorB) ||
    emailsMatch(authorA, authorB) ||
    nameMatchesEmail(authorA, authorB)
  );
};

export const getAuthorMergeSuggestion = (
  authorsToMerge: Author[],
  prospectiveAuthors: Author[]
): Author[] => {
  const newAuthorsToMerge = prospectiveAuthors.filter((prospectiveAuthor) => {
    return authorsToMerge.some((authorToMerge) => {
      return isMatch(authorToMerge, prospectiveAuthor);
    });
  });

  const newProspectiveAuthors = prospectiveAuthors.filter(
    (prospectiveAuthor) => {
      return !newAuthorsToMerge.includes(prospectiveAuthor);
    }
  );

  if (newAuthorsToMerge.length === 0) {
    return authorsToMerge;
  } else {
    return getAuthorMergeSuggestion(
      [...authorsToMerge, ...newAuthorsToMerge],
      newProspectiveAuthors
    );
  }
};

export interface AuthorMergeSuggestion {
  author: Author;
  authorsToMerge: Author[];
}

const gradeAuthorName = (authorName: string) => {
  let pointTotal = 0;

  const words = authorName.split(" ");

  // 1 point if there are 2 or 3 words (first/last name, or first/middle/last name)
  if (words.length === 2 || words.length === 3) {
    pointTotal = pointTotal + 1;
  }

  // 0.25 points if first name starts with a capital letter and followed by lowercase
  if (words.length > 0 && words[0].match(/^[A-Z][a-z]+$/)) {
    pointTotal = pointTotal + 0.25;
  }

  // 0.25 points if middle/last name starts with a capital letter and followed by lowercase
  if (words.length > 1 && words[1].match(/^[A-Z][a-z]+$/)) {
    pointTotal = pointTotal + 0.25;
  }

  // 0.25 points if last name starts with a capital letter and followed by lower case
  if (words.length > 2 && words[2].match(/^[A-Z][a-z]+$/)) {
    pointTotal = pointTotal + 0.25;
  }

  return pointTotal;
};

const gradeEmail = (email: string, authorName: string) => {
  let pointTotal = 0;

  const words = authorName.split(" ");

  // 1 point if email has first name in string
  if (words.length > 0 && email.includes(words[0])) {
    pointTotal = pointTotal + 1;
  }

  // 1 point if email has middle/last name in string
  if (words.length > 1 && email.includes(words[1])) {
    pointTotal = pointTotal + 1;
  }

  // 1 point if email has last name in string
  if (words.length > 2 && email.includes(words[2])) {
    pointTotal = pointTotal + 1;
  }

  return pointTotal;
};

const chooseBestAuthor = (authors: Author[]) => {
  return authors
    .map((author) => ({
      nameScore: gradeAuthorName(author.name),
      emailScore: gradeEmail(author.email, author.name),
      author: author,
    }))
    .reduce((acc, curr) => {
      if (curr.nameScore > acc.nameScore) {
        return curr;
      } else if (
        curr.nameScore === acc.nameScore &&
        curr.emailScore > acc.emailScore
      ) {
        return curr;
      } else {
        return acc;
      }
    }).author;
};

const getSuggestionsHelper = (
  authors: Author[],
  suggestions: AuthorMergeSuggestion[]
): AuthorMergeSuggestion[] => {
  if (authors.length === 0) {
    return suggestions;
  }
  const author = authors[0];
  const mergeSuggestions = getAuthorMergeSuggestion([author], authors.slice(1));

  if (mergeSuggestions.length > 1) {
    const bestAuthor = chooseBestAuthor(mergeSuggestions);

    suggestions.push({
      author: bestAuthor,
      authorsToMerge: mergeSuggestions,
    });
  }

  const newAuthors = authors.filter((a) => !mergeSuggestions.includes(a));

  return getSuggestionsHelper(newAuthors, suggestions);
};

export const getAuthorMergeSuggestions = (authors: Author[]) => {
  if (authors.length === 0) {
    return [];
  }

  return getSuggestionsHelper(authors, []);
};
