import React from 'react';
import SyntaxHighlighter from 'react-syntax-highlighter';
import { vs } from 'react-syntax-highlighter/dist/esm/styles/hljs';
import PageHeader2 from '../general/PageHeader2';
import PageTitle from '../general/PageTitle';
import TaskTable from '../general/TaskTable';
import CenteredIconList from '../icons/CenteredIconList';

import Figures from '../general/Figures';
import boltRealAccuracy from '../images/projects/2024_timestamp_ocr/bolt/bolt_real_cer.svg';
import boltTestAccuracy from '../images/projects/2024_timestamp_ocr/bolt/bolt_test_cer.svg';
import cliToolGif from '../images/projects/2024_timestamp_ocr/cli_tool.gif';
import correctHardImage1 from '../images/projects/2024_timestamp_ocr/correct_hard/6 21 \'03_7bbc7744-7c17-40f8-b827-db4132d37130.png';
import correctHardImage2 from '../images/projects/2024_timestamp_ocr/correct_hard/6 23 \'02_4f76b2ef-fe89-4f27-b56f-8559d6dfa264.png';
import correctHardImage3 from '../images/projects/2024_timestamp_ocr/correct_hard/7 19 \'03_349f4765-e677-4b49-9ca0-60c2e232ee0c.png';
import correctHardImage4 from '../images/projects/2024_timestamp_ocr/correct_hard/7 28 \'02_2ab3d783-cfb4-4be4-9c57-dc87e5b60d92.png';
import ghostingImage1 from '../images/projects/2024_timestamp_ocr/ghosting/1 9 00.png';
import incorrectHardImage1 from '../images/projects/2024_timestamp_ocr/incorrect_hard/1 2 \'99_62528fb4-6ed0-40b2-af1b-599071ac29cb.png'; // 'text': "12 '99", 'label': "1 2 '99"
import incorrectHardImage2 from '../images/projects/2024_timestamp_ocr/incorrect_hard/11 12 \'04_0c9937ee-ad9d-4448-874a-339a915348c1.png'; // 'text': "11 7 '04", 'label': "11 12 '04",
import incorrectHardImage3 from '../images/projects/2024_timestamp_ocr/incorrect_hard/3 2 \'03_6872ff7f-8dd0-4dd2-85f8-66f20563e3c1.png'; // 'text': "3 2 '73", 'label': "3 2 '03",
import incorrectHardImage4 from '../images/projects/2024_timestamp_ocr/incorrect_hard/9 3 \'03_59a2b69c-8b17-406a-8121-62272eb04717.png'; // 'text': "5 3 '03", 'label': "9 3 '03",
import modelImprovementChart from '../images/projects/2024_timestamp_ocr/model_improvement_chart.svg';
import syntheticImage1 from '../images/projects/2024_timestamp_ocr/synthetic/05 4 01.png';
import syntheticImage2 from '../images/projects/2024_timestamp_ocr/synthetic/3 6 49.png';
import syntheticImage3 from '../images/projects/2024_timestamp_ocr/synthetic/39 3 57.png';
import syntheticImage4 from '../images/projects/2024_timestamp_ocr/synthetic/50 09 3.png';
import trocrArchitecture from '../images/projects/2024_timestamp_ocr/trocr_architecture.jpg';

const tasks = [
  {
    message: 'Automate date labeling',
    status: 'complete',
  },
  {
    message: 'Read timestamps with +90% accuracy',
    status: 'complete',
  },
  {
    message: 'Update photo EXIF data',
    status: 'complete',
  },
  {
    message: 'Update photo file name',
    status: 'complete',
  },
];

const futureTasks = [
  {
    message: 'More timestamp formats',
    status: 'partially',
  },
  {
    message: 'More timestamp fonts',
    status: 'partially',
  },
  {
    message: 'More timestamp colors (I have seen purple timestamps)',
    status: 'partially',
  },
  {
    message: 'Hyper-parameter tuning',
    status: 'partially',
  },
  {
    message: 'Use newly labeled photos in eval set',
    status: 'partially',
  },
  {
    message: 'Timestamp location detection',
    status: 'partially',
  },
  {
    message: 'Test time augmentation',
    status: 'partially',
  },
  {
    message: 'macOS native app',
    status: 'partially',
  },
  {
    message: 'remove timestamps after labeling using generative fill',
    status: 'partially',
  },
];

const codeString = `
parameters:
  width_resolution: 384
  alpha_max: 1.0
  alpha_min: 1.0
  max_rotation: 6
  max_text_size: 0.22
  min_text_size: 0.13
  max_char_spacing: 300
  min_char_spacing: 60
  min_text_blur: 0
  max_text_blur: 1.75
  text_color_max_hue: 45
  text_color_min_hue: -15
  text_color_max_saturation: 255
  text_color_min_saturation: 180
  text_color_max_brightness: 255
  text_color_min_brightness: 255
  distortion_frequency_x: 3
  distortion_amplitude_x: 3
  distortion_frequency_y: 3
  distortion_amplitude_y: 3
  font_size: 500
  x_offset: 15
  y_offset: 10
`;

export default function TimestampOCRPage() {
  let backgroundsLocation = 'an internal dataset of stock images when enabling training on Apple\'s compute infrastructure.';
  let training = "Training on my laptop became cumbersome after a while since I couldn't iterate fast enough. I decided to set up my project to run on an Apple compute platform. The platform allowed me to run the fine-tuning experiments in parallel as well as speeding up the task in general.";

  if (process.env.APPLE_INTERNAL === 'true') {
    backgroundsLocation = 'cvml-applenet from [trove](https://trove.apple.com/) when enabling training on [bolt.apple.com](bolt.apple.com).';
    training = "Training on my laptop became cumbersome after a while since I couldn't iterate fast enough. I decided to set up my project to run on an Apple compute platform [bolt](https://bolt.apple.com/). Bolt allowed me to run the fine-tuning experiments in parallel as well as speeding up the task in general.";
  }


  return (
    <>
      <PageTitle title="Timestamp OCR" />
      <PageHeader2 title="Summary" />
      <p>
        Timestamp OCR is a machine learning project aimed at reading seven-segment timestamps from old film photos. My motivation for this project was to chronologically order my family's approximately 2500 scanned film photos in my photo library.
      </p>
      <CenteredIconList iconNames={['pytorch', 'pillow', 'openCV', 'bolt', 'trove']} />

      <PageHeader2 title="Goals" />
      <TaskTable tasks={tasks} />

      <PageHeader2 title="Model" />
      <p>
        I fined tuned Microsoft's large base
        {' '}
        <a href="https://huggingface.co/docs/transformers/en/model_doc/trocr">trOCR</a>
        {' '}
        model from
        {' '}
        <a href="https://huggingface.co/">huggingface.co</a>
        . Over the course of the project, I experimented with various versions of the model to achieve the best results.
      </p>

      <Figures figures={[{ image: trocrArchitecture, message: 'tcOCR architecture (source: huggingface)', size: 'sm' }]} />

      <PageHeader2 title="Synthetic Data" />
      <p>
        Synthetic data generation was necessary for a couple of reasons. First, the real-world photos I had available were all taken between 1997 and 2006 and had timestamps primarily in the
        {' '}
        <span className="text-monospace badge badge-light">mm dd 'yy</span>
        {' '}
        format. If I trained exclusively on those images, the model would overfit to the specific date range I had available and perform poorly on timestamps in the less common
        {' '}
        <span className="text-monospace badge badge-light">'yy mm dd</span>
        {' '}
        format. The second reason synthetic data was necessary was because there were no datasets I could find for this specific problem, and creating my own with real data would defeat the main goal of the project: automating the process of date labeling family photos.
      </p>
      <p>
        To generate synthetic data, I primarily used
        {' '}
        <a href="https://pypi.org/project/pillow/">pillow</a>
        . Other modules used for generation/augmentation include
        {' '}
        <a href="https://albumentations.ai/">albumentations</a>
        ,
        {' '}
        <a href="https://pypi.org/project/blend-modes/">blend_modes</a>
        , and
        {' '}
        <a href="https://pypi.org/project/opencv-python/">opencv-python</a>
        .
      </p>
      <p>
        The images are generated by obtaining a random, augmented crop from a pool of background images. Random timestamp text is generated and augmented using many configurable and hardcoded parameter ranges including font, character spacing, character size, apostrophe position, color, etc. The text is then "blended" on top of the background using a random blend mode.
      </p>
      <SyntaxHighlighter language="yaml" style={vs}>
        {codeString}
      </SyntaxHighlighter>
      <p>
        Backgrounds were originally sourced from my personal photos library. I switched to
        {' '}
        {backgroundsLocation}
        {' '}
        This larger pool of backgrounds makes sure the model can generalize to unseen backgrounds.
      </p>

      <Figures figures={[{ image: syntheticImage1, message: 'synthetic examples' }, { image: syntheticImage2 }, { image: syntheticImage3 }, { image: syntheticImage4 }]} />

      <PageHeader2 title="Fine Tuning" />
      <p>
        Initially, I fine-tuned the model using rudimentary synthetic data on my laptop as a proof of concept. To evaluate the initial real-world performance, I created a dataset using 100 of my real-world images that had already been labeled. I observed that every improvement to synthetic data generation also improved real-world evaluation performance.
      </p>
      <p>
        {training}
      </p>
      <Figures figures={[{ image: boltTestAccuracy, message: 'test eval dataset (cer)' }, { image: boltRealAccuracy, message: 'real eval dataset (cer)' }]} />

      <PageHeader2 title="Results" />
      <p>
        The best model I fine-tuned has a CER (character error rate) value of 1.7% on a validation set of 1100 real-world images. (Note again, this result was achieved using only synthetic data.) Additionally, 92% of the predicted strings matched the ground truth labels exactly. Using this model to label images, a user should expect an error once every ~13 images.
      </p>
      <Figures figures={[{ image: modelImprovementChart, message: 'fine tuning CER improvement' }]} />

      <PageHeader2 title="Predictions" />
      <Figures figures={[{ image: correctHardImage1, message: 'correctly predicted: 6 21 \'03' }, { image: correctHardImage2, message: 'correctly predicted: 6 23 \'02' }, { image: correctHardImage3, message: 'correctly predicted: 7 19 \'03' }, { image: correctHardImage4, message: 'correctly predicted: 7 28 \'02' }]} />
      <Figures figures={[{ image: incorrectHardImage1, message: 'incorrectly predicted: 12 \'99' }, { image: incorrectHardImage2, message: 'incorrectly predicted: 11 7 \'04' }, { image: incorrectHardImage3, message: 'incorrectly predicted: 3 2 \'73' }, { image: incorrectHardImage4, message: 'incorrectly predicted: 5 3 \'03' }]} />

      <PageHeader2 title="Labeling CLI" />
      <p>
        To label my family photos, I created a CLI tool that takes a folder of images and presents each cropped timestamp to the user with the predicted label. If the prediction is correct, the user can press the return key to continue. If the prediction is incorrect, the user can type the correct string and press return to continue. Once the user presses enter, the photo name is postfixed with the timestamp text, and the date created EXIF data is changed to the approved date.
      </p>
      <p>
        If a timestamp crop doesn't contain the whole timestamp, the user can type "left" or "right" to move the crop window to the correct position. (In the future, I would like to automatically detect the position of the timestamp with a different model to remove the need for this feature.)
      </p>
      <Figures figures={[{ image: cliToolGif, size: 'md' }]} />

      <PageHeader2 title="Issues" />
      <p>
        I have identified one main issue with the current best model that I'm calling "ghosting". Ghosting occurs when the camera's seven-segment LEDs incorrectly expose the film before or after the original exposure to generate the correct timestamp (see example below). To account for this issue in training, I believe a new synthetic generation mode can be added that generates two timestamps, lowers the opacity of one timestamp to some fractional opacity of the other, and finally blends them in the same location on the background image. I haven't implemented this functionality yet but plan to in future work.
      </p>
      <Figures figures={[{ image: ghostingImage1, message: 'ghosting example' }]} />

      <PageHeader2 title="Future Work" />
      <TaskTable tasks={futureTasks} />
    </>
  );
}
