import {
  forwardRef,
  PropsWithChildren,
  useEffect,
  useRef,
  useState,
} from 'react';
import { useDropzone } from 'react-dropzone';
import { Box, CircularProgress, Stack, Typography } from '@mui/material';
import FileUploadOutlinedIcon from '@mui/icons-material/FileUploadOutlined';
import RestartAltIcon from '@mui/icons-material/RestartAlt';
import SettingsVoiceOutlinedIcon from '@mui/icons-material/SettingsVoiceOutlined';

import ModalCardView, { ModalCardViewCloseProps } from '../ModalCardView';
import { M3Button } from '../M3/M3Button';
import { M3Fab } from '../M3/M3Fab';
import { M3TextField } from '../M3/M3TextField';

import { useSpeechToText } from '../../hooks/global/useRealtimeSpeechToText';
import {
  useOpenAIWhisper,
  useOpenAIWhisperTranscribe,
} from '../../hooks/openai';
import { useAppProvider } from '../../providers/app/app';

type AudioRecorderModalViewProps = PropsWithChildren &
  ModalCardViewCloseProps & {
    fileKey?: string;
    onSubmit?: (data: DataRet) => void;
    onReset?: () => void;
  };

type DataRet = {
  text: string;
};

type AISessionRef = {
  polling: boolean;
  timer: ReturnType<typeof setTimeout> | null;
  sessionId: string;
  content: string;
  mounted: boolean;
};

export function AudioRecorderModalViewBase({
  fileKey = 'audio_file',
  close,
  onReset,
  onSubmit,
}: AudioRecorderModalViewProps) {
  const { isDarkMode } = useAppProvider();
  const [isTranscribing, setIsTranscribing] = useState(false);
  const [isDoneTranscribing, setIsDoneTranscribing] = useState(false);
  const [error, setError] = useState<null | Error>(null);
  const [text, setText] = useState('');

  const textareaRef = useRef<HTMLTextAreaElement | null>(null);
  const sessionRef = useRef<AISessionRef>({
    polling: false,
    timer: null,
    sessionId: '',
    content: '',
    mounted: false,
  });
  // always set the content
  sessionRef.current.content = text;

  const textToSpeech = useSpeechToText();

  const aiWhisper = useOpenAIWhisper();
  const aiWhisperTranscribe = useOpenAIWhisperTranscribe(
    {
      session_id: aiWhisper.data?.session_id!,
    },
    {
      // lets enable this once we receive a session id
      enabled: !!aiWhisper.data?.session_id!,
    },
  );

  const dropzoneConfig = {
    accept: {
      'audio/mp3': ['.mp3'],
      'audio/m4a': ['.m4a'],
      'audio/webm': ['.webm'],
      'audio/mp4': ['.mp4'],
    },
  };
  const audioDropzone = useDropzone({
    ...dropzoneConfig,
    onDrop: (acceptedFiles) => {
      const audioFile = acceptedFiles[0];
      const formData = new FormData();
      formData.append(fileKey, audioFile);
      aiWhisper.mutate(formData);
    },
  });

  const transcriptText = textToSpeech.transcript
    .filter((t) => t.phrase)
    .map((t) => t.phrase)
    .join(' ')
    .trim();

  const startRealtimeTranscribe = async () => {
    await textToSpeech.start();
    setIsTranscribing(true);
  };

  const stopRealtimeTranscribe = async () => {
    await textToSpeech.stop();
    setIsTranscribing(false);
    setIsDoneTranscribing(true);
  };

  const onRestartClick = () => {
    if (onReset) onReset();
  };

  const onSubmitClick = () => {
    onSubmit?.({ text: text || transcriptText });
    close?.();
  };

  const renderTopPanel = () => {
    return (
      <Stack
        sx={{ flex: 1 }}
        flexDirection='row'
        alignItems='flex-start'
        justifyContent='flex-start'
      >
        <SettingsVoiceOutlinedIcon sx={{ mt: 0.3, mr: 1 }} />
        <Typography component='div' position='relative'>
          <Typography fontSize={20} fontWeight={500} component='div'>
            Text to speech
          </Typography>
        </Typography>
      </Stack>
    );
  };

  const renderError = (message?: string) => {
    return (
      <span
        style={{
          color: isDarkMode
            ? 'var(--md-ref-palette-error80)'
            : 'var(--md-ref-palette-error40)',
        }}
      >
        {message}
      </span>
    );
  };

  const renderBottomPanel = () => {
    return (
      <Stack
        gap={4}
        direction='row'
        sx={{ p: 2, flex: 1 }}
        alignItems='center'
        justifyContent='center'
      >
        {(textToSpeech.transcript.length || text.length) &&
        isDoneTranscribing ? (
          <Stack
            flex={1}
            direction='row'
            alignItems='center'
            justifyContent='space-between'
          >
            <M3Button
              disabled={isTranscribing || !isDoneTranscribing}
              onClick={onRestartClick}
              sx={{
                width: 110,
              }}
              startIcon={<RestartAltIcon />}
            >
              Restart
            </M3Button>
            <M3Button
              disabled={isTranscribing || !isDoneTranscribing}
              color='primary'
              variant='contained'
              onClick={onSubmitClick}
              sx={{
                width: 110,
              }}
            >
              Submit
            </M3Button>
          </Stack>
        ) : (
          <Box flex={1}>
            <M3Fab
              variant='extended'
              sx={{
                width: '100%',
              }}
              disabled={
                textToSpeech.loading ||
                aiWhisperTranscribe.isLoading ||
                (isTranscribing && sessionRef.current.polling)
              }
              onClick={async () =>
                isTranscribing
                  ? await stopRealtimeTranscribe()
                  : await startRealtimeTranscribe()
              }
            >
              {textToSpeech.loading
                ? 'Loading ...'
                : `${textToSpeech.transcribing ? 'Stop' : 'Start'} Recording`}
            </M3Fab>
          </Box>
        )}
      </Stack>
    );
  };

  const renderAudioDropzone = () => {
    return (
      <Box
        {...audioDropzone.getRootProps({ className: 'dropzone' })}
        display='flex'
        alignItems='center'
        justifyContent='center'
        sx={{
          p: 4,
          height: 210,
          cursor: 'pointer',
        }}
      >
        <input {...audioDropzone.getInputProps()} />
        <Box
          display='flex'
          flexDirection='column'
          alignItems='center'
          justifyContent='center'
        >
          <FileUploadOutlinedIcon style={{ fontSize: 40 }} />
          <Typography component='div' fontSize={22} fontWeight={500} mb={1}>
            Upload audio from computer
          </Typography>
          <Typography
            component='div'
            fontSize={14}
            mb={1}
            sx={{ opacity: 0.5 }}
          >
            or drag and drop here
          </Typography>
          <Typography
            component='div'
            fontSize={14}
            mb={1}
            sx={{ opacity: 0.5 }}
          >
            (mp4, m4a, mp3 and webm)
          </Typography>
        </Box>
      </Box>
    );
  };

  const renderForm = () => {
    return (
      <Box textAlign='left'>
        <M3TextField
          sx={{
            border: 'none',
            '& .MuiInputBase-root': {
              background: isDarkMode
                ? 'var(--md-ref-palette-neutral20) !important'
                : 'var(--md-ref-palette-neutral90) !important',
              color: error
                ? isDarkMode
                  ? 'var(--md-ref-palette-error80)'
                  : 'var(--md-ref-palette-error40)'
                : undefined,
              ':active': {
                border: 'none',
              },
              ':hover': {
                border: 'none !important',
              },
            },
            '& .MuiInputBase-input': {
              cursor: 'auto',
              fontSize: 14,
            },
            '& .MuiOutlinedInput-notchedOutline': {
              border: 'none !important',
            },
          }}
          value={
            error
              ? renderError(JSON.stringify(error.message))
              : text || transcriptText
          }
          fullWidth
          multiline
          rows={8}
          inputRef={textareaRef}
          inputProps={{
            readOnly: true,
          }}
          onChange={(evt) => setText(evt.target.value)}
        />
      </Box>
    );
  };

  useEffect(() => {
    if (aiWhisperTranscribe.data) {
      const { status, data } = aiWhisperTranscribe.data;
      let dataText =
        data || 'Please wait while we transcribe the audio file...';

      if (sessionRef.current.timer) {
        clearTimeout(sessionRef.current.timer);
      }

      if (status === 'in_progress' || status === 'retrying') {
        sessionRef.current.content = dataText;
        sessionRef.current.polling = true;
        setText(dataText);
        setIsTranscribing(true);
        // add some delay for when fetching again, since open ai is not immediate
        // and will take some time for the server to update its chunk, this is a
        // a good technique to not overload fetching
        sessionRef.current.timer = setTimeout(() => {
          aiWhisperTranscribe.remove();
          aiWhisperTranscribe.refetch();
        }, 700);
      } else if (status === 'done') {
        sessionRef.current.content = dataText;
        sessionRef.current.polling = false;
        setIsDoneTranscribing(true);
        setIsTranscribing(false);
        aiWhisperTranscribe.remove();
        setText(dataText);
      } else if (status === 'error') {
        const err = new Error(dataText);
        sessionRef.current.polling = false;
        setError(err);
        setIsTranscribing(false);
      }
    }
    // eslint-disable-next-line
  }, [aiWhisperTranscribe.isSuccess, setText, setError, setIsTranscribing]);

  useEffect(() => {
    let current = sessionRef.current;
    current.mounted = true;

    return () => {
      current.mounted = false;
      current.timer && clearTimeout(current.timer);
    };
    // eslint-disable-next-line
  }, [sessionRef]);

  useEffect(() => {
    if (textareaRef.current) {
      textareaRef.current.scrollTop = textareaRef.current.scrollHeight;
    }
  }, [textareaRef, error, text, transcriptText]);

  return (
    <ModalCardView
      header={renderTopPanel()}
      headerSx={{ pt: 2, pb: 2 }}
      footer={renderBottomPanel()}
      close={close}
      sx={{
        maxWidth: 480,
        minWidth: 480,
      }}
    >
      <Box sx={{ pt: 4, pb: 4 }} textAlign='center'>
        {textToSpeech.loading ? (
          <Box
            sx={{
              display: 'flex',
              justifyContent: 'center',
              alignItems: 'center',
              mt: 4,
            }}
          >
            <CircularProgress size={32} />
          </Box>
        ) : (
          <Typography
            component='div'
            sx={{
              borderRadius: 2,
              overflow: 'hidden',
              backgroundColor: isDarkMode
                ? 'var(--md-ref-palette-neutral20)'
                : 'var(--md-ref-palette-neutral90)',
            }}
          >
            {isTranscribing || textToSpeech.transcript.length || text.length
              ? renderForm()
              : renderAudioDropzone()}
          </Typography>
        )}
      </Box>
    </ModalCardView>
  );
}

const AudioRecorderModalView = forwardRef(
  (props: AudioRecorderModalViewProps, ref) => (
    <AudioRecorderModalViewBase {...props} />
  ),
);

export default AudioRecorderModalView;
