#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <float.h>
#include <string.h>

typedef union {
  double d;
  int i[2];
} d_cast;

static const double F_PI = M_PI;
static const double fMachEps = M_E;  // ::std::numeric_limits<double>::epsilon()
static const double fMaxGammaArgument = 171.624376956302;  // found experimental
static const int false = 0;
static const int true = 1;
static double fLogDblMax;
static double fLogDblMin;

static double
lcl_getLanczosSum (double fZ)
{
  const double fNum[13] ={
    23531376880.41075968857200767445163675473,
    42919803642.64909876895789904700198885093,
    35711959237.35566804944018545154716670596,
    17921034426.03720969991975575445893111267,
    6039542586.35202800506429164430729792107,
    1439720407.311721673663223072794912393972,
    248874557.8620541565114603864132294232163,
    31426415.58540019438061423162831820536287,
    2876370.628935372441225409051620849613599,
    186056.2653952234950402949897160456992822,
    8071.672002365816210638002902272250613822,
    210.8242777515793458725097339207133627117,
    2.506628274631000270164908177133837338626
  };
  const double fDenom[13] = {
    0,
    39916800,
    120543840,
    150917976,
    105258076,
    45995730,
    13339535,
    2637558,
    357423,
    32670,
    1925,
    66,
    1
  };
  // Horner scheme
  double fSumNum;
  double fSumDenom;
  int nI;
  double fZInv;
  if (fZ <= 1.0) {
    fSumNum = fNum[12];
    fSumDenom = fDenom[12];
    for (nI = 11; nI >= 0; --nI) {
      fSumNum *= fZ;
      fSumNum += fNum[nI];
      fSumDenom *= fZ;
      fSumDenom += fDenom[nI];
    }
  } else {
    // Cancel down with fZ^12; Horner scheme with reverse coefficients
    fZInv = 1/fZ;
    fSumNum = fNum[0];
    fSumDenom = fDenom[0];
    for (nI = 1; nI <=12; ++nI) {
      fSumNum *= fZInv;
      fSumNum += fNum[nI];
      fSumDenom *= fZInv;
      fSumDenom += fDenom[nI];
    }
  }
  return fSumNum/fSumDenom;
}

static double
lcl_GetBetaHelperContFrac (double fX, double fA, double fB)
{
  // like old version
  double a1, b1, a2, b2, fnorm, apl2m, d2m, d2m1, cfnew, cf;
  a1 = 1.0; b1 = 1.0;
  b2 = 1.0 - (fA+fB)/(fA+1.0)*fX;
  if (b2 == 0.0) {
    a2 = 0.0;
    fnorm = 1.0;
    cf = 1.0;
  } else {
    a2 = 1.0;
    fnorm = 1.0/b2;
    cf = a2*fnorm;
  }
  cfnew = 1.0;
  double rm = 1.0;

  const double fMaxIter = 50000.0;
  // loop security, normal cases converge in less than 100 iterations.
  // FIXME: You will get so much iteratons for fX near mean,
  // I do not know a better algorithm.
  int bfinished = false;
  do {
    apl2m = fA + 2.0*rm;
    d2m = rm*(fB-rm)*fX/((apl2m-1.0)*apl2m);
    d2m1 = -(fA+rm)*(fA+fB+rm)*fX/(apl2m*(apl2m+1.0));
    a1 = (a2+d2m*a1)*fnorm;
    b1 = (b2+d2m*b1)*fnorm;
    a2 = a1 + d2m1*a2*fnorm;
    b2 = b1 + d2m1*b2*fnorm;
    if (b2 != 0.0) {
      fnorm = 1.0/b2;
      cfnew = a2*fnorm;
      bfinished = (fabs(cf-cfnew) < fabs(cf)*fMachEps);
    }
    cf = cfnew;
    rm += 1.0;
  }
  while (rm < fMaxIter && !bfinished);
  return cf;
}

static double
lcl_GetLogGammaHelper (double fZ)
{
  const double fg = 6.024680040776729583740234375;
  double fZgHelp = fZ + fg - 0.5;
  return log (lcl_getLanczosSum (fZ)) + (fZ-0.5) * log (fZgHelp) - fZgHelp;
}

static int
isSignBitSet (double d)
{
  d_cast x;

  x.d = d;
  return (x.i[0] & 0x80000000) != 0;
}

static int const n10Count = 16;
static double const n10s[2][16] = {
  { 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8,
    1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16 },
  { 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8,
    1e-9, 1e-10, 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, 1e-16 }
};

static double
getN10Exp (int nExp)
{
  if (nExp < 0) {
    if (-nExp <= n10Count)
      return n10s[1][-nExp-1];
    else
      return pow (10.0, nExp);
  } else if (nExp > 0) {
    if (nExp <= n10Count)
      return n10s[0][nExp-1];
    else
      return pow (10.0, nExp);
  }
  else // ( nExp == 0 )
    return 1.0;
}

double
approxValue (double fValue)
{
  if (fValue == 0.0 || fValue == HUGE_VAL)
    // We don't handle these conditions.  Bail out.
    return fValue;

  int bSign = isSignBitSet (fValue);
  if (bSign)
    fValue = -fValue;
  
  int nExp = floor (log10 (fValue));
  nExp = 14 - nExp;
  double fExpValue = getN10Exp (nExp);
  
  fValue *= fExpValue;
  fValue = round (fValue); //rtl_math_round (fValue, 0, rtl_math_RoundingMode_Corrected);
  fValue /= fExpValue;
  
  return bSign ? -fValue : fValue;
}

double
lcl_GetGammaHelper (double fZ)
{
  double fGamma = lcl_getLanczosSum(fZ);
  const double fg = 6.024680040776729583740234375;
  double fZgHelp = fZ + fg - 0.5;
  // avoid intermediate overflow
  double fHalfpower = pow (fZgHelp, fZ / 2 - 0.25);
  fGamma *= fHalfpower;
  fGamma /= exp (fZgHelp);
  fGamma *= fHalfpower;
  if (fZ <= 20.0 && fZ == floor (approxValue (fZ)))
    fGamma = round(fGamma);
  return fGamma;
}

static double
GetGamma (double fZ)
{
  const double fLogPi = log (F_PI);

  if (fZ >= 1.0)
    return lcl_GetGammaHelper (fZ);

  if (fZ >= 0.5)  // shift to x>=1 using Gamma(x)=Gamma(x+1)/x
    return lcl_GetGammaHelper (fZ+1) / fZ;

  if (fZ >= -0.5) {
    // shift to x>=1, might overflow
    double fLogTest = lcl_GetLogGammaHelper (fZ+2) - log (fZ+1) - log (fabs (fZ));
    return lcl_GetGammaHelper (fZ+2) / (fZ+1) / fZ;
  }
  // fZ<-0.5
  // Use Euler's reflection formula: gamma(x)= pi/ ( gamma(1-x)*sin(pi*x) )
  double fLogDivisor = lcl_GetLogGammaHelper (1-fZ) + log (fabs (sin (F_PI*fZ)));
  if (fLogDivisor - fLogPi >= fLogDblMax)     // underflow
    return 0.0;
  
  return exp (fLogPi - fLogDivisor) * ((sin (F_PI*fZ) < 0.0) ? -1.0 : 1.0);
}

static double
GetBeta (double fAlpha, double fBeta)
{
  double fA;
  double fB;
  if (fAlpha > fBeta) {
    fA = fAlpha; fB = fBeta;
  } else {
    fA = fBeta; fB = fAlpha;
  }

  if (fA+fB < fMaxGammaArgument) // simple case
    return GetGamma(fA) / GetGamma(fA+fB)*GetGamma(fB);
  // need logarithm
  // GetLogGamma is not accurate enough, back to Lanczos for all three
  // GetGamma and arrange factors newly.
  const double fg = 6.024680040776729583740234375; //see GetGamma
  double fgm = fg - 0.5;
  double fLanczos = lcl_getLanczosSum (fA);
  fLanczos /= lcl_getLanczosSum (fA+fB);
  fLanczos *= lcl_getLanczosSum (fB);
  double fABgm = fA+fB+fgm;
  fLanczos *= sqrt ((fABgm/(fA+fgm))/(fB+fgm));
  double fTempA = fB/(fA+fgm); // (fA+fgm)/fABgm = 1 / ( 1 + fB/(fA+fgm))
  double fTempB = fA/(fB+fgm);
  double fResult = exp (-fA * log1p(fTempA) - fB * log1p (fTempB) - fgm);
  fResult *= fLanczos;
  return fResult;
}

static double
GetLogBeta (double fAlpha, double fBeta)
{
  double fA;
  double fB;
  if (fAlpha > fBeta) {
    fA = fAlpha; fB = fBeta;
  } else {
    fA = fBeta; fB = fAlpha;
  }
  const double fg = 6.024680040776729583740234375; //see GetGamma
  double fgm = fg - 0.5;
  double fLanczos = lcl_getLanczosSum (fA);
  fLanczos /= lcl_getLanczosSum (fA+fB);
  fLanczos *= lcl_getLanczosSum (fB);
  double fLogLanczos = log (fLanczos);
  double fABgm = fA+fB+fgm;
  fLogLanczos += 0.5*(log (fABgm) - log (fA+fgm) - log (fB+fgm));
  double fTempA = fB/(fA+fgm); // (fA+fgm)/fABgm = 1 / ( 1 + fB/(fA+fgm))
  double fTempB = fA/(fB+fgm);
  double fResult = -fA * log1p(fTempA) - fB * log1p (fTempB) - fgm;
  fResult += fLogLanczos;
  return fResult;
}

static double
GetBetaDistPDF (double fX, double fA, double fB)
{
  // special cases
  if (fA == 1.0) {
    // result b*(1-x)^(b-1)
    if (fB == 1.0)
      return 1.0;
    if (fB == 2.0)
      return -2.0*fX + 2.0;
    if (fX <= 0.01)
      return fB + fB * expm1 ((fB-1.0) * log1p (-fX));
    else
      return fB * pow (0.5-fX+0.5, fB-1.0);
  }
  if (fB == 1.0) {
    // result a*x^(a-1)
    if (fA == 2.0)
      return fA * fX;
    return fA * pow(fX, fA-1);
  }
  if (fX <= 0.0) {
    return 0.0;
  }
  if (fX >= 1.0) {
    return 0.0;
  }

  // normal cases; result x^(a-1)*(1-x)^(b-1)/Beta(a,b)
  double fLogY = (fX < 0.1) ? log1p (-fX) : log (0.5-fX+0.5);
  double fLogX = log (fX);
  double fAm1 = fA-1.0;
  double fBm1 = fB-1.0;
  double fLogBeta = GetLogBeta (fA,fB);
  // check whether parts over- or underflow
  if (   fAm1 * fLogX < fLogDblMax  && fAm1 * fLogX > fLogDblMin
	 && fBm1 * fLogY < fLogDblMax  && fBm1* fLogY > fLogDblMin
	 && fLogBeta < fLogDblMax      && fLogBeta > fLogDblMin )
    return pow (fX,fA-1.0) * pow (0.5-fX+0.5,fB-1.0) / GetBeta (fA,fB);
  else // need logarithm;
    // might overflow as a whole, but seldom, not worth to pre-detect it
    return exp((fA-1.0)*fLogX + (fB-1.0)* fLogY - fLogBeta);
}

static double
GetBetaDist (double fXin, double fAlpha, double fBeta)
{
  // special cases
  if (fXin <= 0.0)  // values are valid, see spec
    return 0.0;
  if (fXin >= 1.0)  // values are valid, see spec
    return 1.0;
  if (fBeta == 1.0)
    return pow(fXin, fAlpha);
  if (fAlpha == 1.0)
    //            1.0 - pow(1.0-fX,fBeta) is not accurate enough
    return expm1 (fBeta * log1p (-fXin));
  //FIXME: need special algorithm for fX near fP for large fA,fB
  double fResult;
  // I use always continued fraction, power series are neither
  // faster nor more accurate.
  double fY = (0.5-fXin)+0.5;
  double flnY = log1p (-fXin);
  double fX = fXin;
  double flnX = log (fXin);
  double fA = fAlpha;
  double fB = fBeta;
  int bReflect = fXin > fAlpha/(fAlpha+fBeta);
  if (bReflect) {
    fA = fBeta;
    fB = fAlpha;
    fX = fY;
    fY = fXin;
    flnX = flnY;
    flnY = log(fXin);
  }
  fResult = lcl_GetBetaHelperContFrac (fX,fA,fB);
  fResult = fResult/fA;
  double fP = fA/(fA+fB);
  double fQ = fB/(fA+fB);
  double fTemp;
  if (fA > 1.0 && fB > 1.0 && fP < 0.97 && fQ < 0.97) //found experimental
    fTemp = GetBetaDistPDF (fX,fA,fB)*fX*fY;
  else
    fTemp = exp (fA*flnX + fB*flnY - GetLogBeta (fA,fB));
  fResult *= fTemp;
  if (bReflect)
    fResult = 0.5 - fResult + 0.5;
  if (fResult > 1.0) // ensure valid range
    fResult = 1.0;
  if (fResult < 0.0)
    fResult = 0.0;
  return fResult;
}

static double
GetTDist(double fT, double fDF)
{
    return 0.5 * GetBetaDist (fDF/(fDF+fT*fT), fDF/2.0, 0.5);
}

static int
CalculateTest (double *pMat1, double *pMat2, int n, double *fT, double *fDF)
{
  double fCount1  = 0.0;
  double fCount2  = 0.0;
  double fSum1    = 0.0;
  double fSumSqr1 = 0.0;
  double fSum2    = 0.0;
  double fSumSqr2 = 0.0;
  double fVal;
  int i;

  for (i = 0; i < n; i++) {
    fVal = pMat1[i];
    fSum1    += fVal;
    fSumSqr1 += fVal * fVal;
    fCount1++;
  }
  for (i = 0; i < n; i++) {
    fVal = pMat2[i];
    fSum2    += fVal;
    fSumSqr2 += fVal * fVal;
    fCount2++;
  }
  //  laut Bronstein-Semendjajew
  double fS1 = (fSumSqr1 - fSum1*fSum1/fCount1) / (fCount1 - 1.0);    // Varianz
  double fS2 = (fSumSqr2 - fSum2*fSum2/fCount2) / (fCount2 - 1.0);
  *fT = fabs (fSum1/fCount1 - fSum2/fCount2) /
    sqrt ((fCount1-1.0)*fS1 + (fCount2-1.0)*fS2) *
    sqrt (fCount1*fCount2*(fCount1+fCount2-2)/(fCount1+fCount2));
  *fDF = fCount1 + fCount2 - 2;

  return true;
}

static double
average (double *A, int n)
{
  int i;
  double m;

  m = 0.0;
  for (i = 0; i < n; i++)
    m += A[i];
  return m / n;
}

static double
stdev (double *A, int n)
{
  int i;
  double avg;
  double dev;
  double s;

  avg = average (A, n);
  dev = 0.0;
  for (i = 0; i < n; i++) {
    s = avg - A[i];
    dev += s * s;
  }
  return sqrt (dev / (n - 1));
}

static double
tvalue (double *A, double *B, int n)
{
  double aA, aB;
  double dA, dB;
  double s, t;

  aA = average (A, n);
  aB = average (B, n);
  dA = stdev (A, n);
  dB = stdev (B, n);

  s = sqrt ((dA*dA + dB*dB) / 2.0);

  t = (aA - aB) / (s * sqrt (2.0 / n));

  return t;
}

static int
degrees_freedom (double *A, double *B, int n)
{
  double aA, aB;
  double dA, dB;
  double fA, fB;
  double df;

  aA = average (A, n);
  aB = average (B, n);
  dA = stdev (A, n);
  dB = stdev (B, n);

  fA = dA * dA / n;
  fB = dB * dB / n;
  df = ((fA + fB) * (fA + fB)) / ((fA*fA)/(n - 1) + (fB*fB)/(n - 1));
  return round(df);
}

/*
  For this input:
  ./ttest 996 995 997 994 998 997 995 993
  
  We want to reproduce excell results:
  
  aA = 996.50
  aB = 994.75
  dA = 1.2909944487
  dB = 1.7078251277
  T-value = 1.6348
  df = 6
  One tailed, p-value=0.0765991975

  P Value = [ 1/ ( (√df) Β(1/2,df/2) ) ] lt -> -t to t ∫ ( 1+ x²/df)(-(df+1)/2) .dx
  Where
      df = degrees of freedom
      x = T-Value
*/

static int
scan_arg (char *s, double *vec)
{
  int i;
  char *p;

  i = 0;
  p = strtok (s, " \t\n");
  while (p) {
    vec[i] = atof (p);
    // printf ("vec[%d]=%s %f\n", i, p, vec[i]);
    i++;
    p = strtok (NULL, " \t\n");
  }

  return i;
}

int
main (int argc, char *argv[])
{
  double A[32];
  double B[32];
  int i;
  int n;
  double fT, fDF;
  double p_value;
  int verbose = 0;


  n = 0;
  for (i = 1; i < argc; i++) {
    if (strcmp (argv[i], "-v") == 0) {
      verbose = 1;
    } else if (n == 0) {
      /* Collect vector A */
      n = scan_arg (argv[i], A);
    } else {
      /* Collect vector B */
      n = scan_arg (argv[i], B);
    }
  }

  fLogDblMax = log (DBL_MAX);
  fLogDblMin = log (DBL_MIN);

  CalculateTest (A, B, n, &fT, &fDF);
  p_value = GetTDist (fT, fDF);

  if (verbose) {
    printf ("t-value: %f\n", fT);
    printf ("D.F.:    %f\n", fDF);
    printf ("p-value: %f\n", p_value);
  } else {
    printf ("%.1f\n", p_value * 100.0);
  }
}
