#include "locus.h"

locus::locus(int nRaw, int n)
{
  nindividusRaw = nRaw;
  nindividus = n;
  
  rgenotypes = arma::ivec(n);
  rgSC = arma::vec(n);
  rSC = arma::vec(n);

  vectorRawA = arma::vec(nRaw);
  vectorRawB = arma::vec(nRaw);
  vectorRawF = arma::vec(nRaw);
  
}

locus::~locus()
{
}

void locus::carregaDades(ifstream & fitxer, arma::uvec & outliers, ofstream & fgenotypes, ofstream & fgscores, ofstream & fcnvscores, ofstream & flog)
{
  if ((fitxer >> nom).fail())
  {
    return;
  }
  
  fitxer >> chr;
  if ((fitxer >> posicio).fail())
  {
    escriuError("Error: The input file is not properly formated.");
  }
  string val;
  size_t index;
  istringstream inp;
  for (int i = 0; i < nindividusRaw; i++)
  {
	fitxer >> val;
	index = val.find("NeuN");
	if (index != string::npos)
	{
	  vectorRawA(i)=-1;
	}
	else {
	  inp.str(val);
	  if ((inp >> vectorRawA(i)).fail())
      {
        std::cout << "PositionX: " << i << std::endl;
        escriuError("Error: The input file is not properly formated.");
      }
	  inp.clear();
	}
	fitxer >> val;
	index = val.find("NeuN");
	if (index != string::npos)
	{
	  vectorRawB(i)=-1;
	}
	else {
	  inp.str(val);
	  if ((inp >> vectorRawB(i)).fail())
      {
        std::cout << "PositionY: " << i << std::endl;
        escriuError("Error: The input file is not properly formated.");
      }
	  inp.clear();
	}
  }

  vectorA = vectorRawA.elem(outliers);
  vectorB = vectorRawB.elem(outliers);

  filtre = arma::find( (vectorA >= 0) % (vectorB >= 0) );
  
  vectorA = vectorA.elem(filtre);
  vectorB = vectorB.elem(filtre);
  
  for(unsigned int i = 0; i < vectorA.n_elem; i++)
  {
    vectorA(i) = (vectorA(i)<o.sens?o.sens:vectorA(i));
    vectorB(i) = (vectorB(i)<o.sens?o.sens:vectorB(i));
  }

  vectorF = arma::atan(vectorB/vectorA)*2./MY_PI;
  
  arma::vec suma = vectorA + vectorB;
  arma::uvec temp = ( suma > o.mic);
  arma::uvec utemp = arma::find(temp);
  arma::uvec iA = arma::find( (vectorF < o.mac) % temp );
  arma::uvec iB = arma::find( (vectorF > 1. - o.mac) % temp );
  
  if( iA.n_elem>o.mhc && iB.n_elem>o.mhc )
  {
    vectorA = vectorA/findnormal(vectorA.elem(iA));
    vectorB = vectorB/findnormal(vectorB.elem(iB));
  }
  else if( iA.n_elem>o.mhc && iB.n_elem<=o.mhc )
  {
    refind(vectorF, vectorA, vectorB, utemp);
  }
  else if( iA.n_elem<=o.mhc && iB.n_elem>o.mhc )
  {
    arma::vec vtemp = atan(vectorA/vectorB)*2./MY_PI;
    refind(vtemp, vectorB, vectorA, utemp);
  }
  else
  {
    double dtemp = arma::mean(suma);
    vectorA = vectorA / dtemp;
    vectorB = vectorB / dtemp;
  }

  arma::vec sumaAB = vectorA + vectorB;

  try{
    gt.FI1 = vectorF;
    gt.FI2 = sumaAB;
    gt.res = o.res;
    gt.minlen = 0.2;
    gt.zeroD = o.zeroD;
    gt.b = 0;
    gt.farciment = generaRang(0., 0.2, 0.001);
    gt.troba_genotypes();

    if(gt.error == 0 && gt.qc >= o.gqt)
    {
      escriu_genotype(fgenotypes);
      escriu_genotype_sc(fgscores);
    
      if(o.og == 0)
      {
        cnv.FI1 = vectorF;
        cnv.FI2 = sumaAB;
        cnv.genotypes = gt.genotypes;
        cnv.scg = gt.sc;
        cnv.INT1 = vectorA;
        cnv.INT2 = vectorB;
        cnv.stdCNP = 8;
        cnv.troba_cnvscoring();
    
        escriu_cnvscores(fcnvscores);
      }
    }
    else
    {
      if(gt.error == 0)
      {
        std::cout << "The locus " << nom << " has low SNP QC score (QC < gtc). It is omitted. See the [ CNstream.log ] file." << std::endl;
        flog << "The locus " << nom << " has low SNP QC score (QC < gtc). It is omitted." << std::endl;
      }
      else
      {
        std::cout << "The locus " << nom << " can not be properly genotyped. It is omitted. See the [ CNstream.log ] file." << std::endl;
        flog << "The locus " << nom << " can not be properly genotyped. It is omitted." << std::endl;
      }
    }
  }
  catch(...)
  {
    std::cout << "The locus " << nom << " can not be properly computed. It is omitted. See the [ CNstream.log ] file." << std::endl;
    flog << "The locus " << nom << " can not be properly computed. It is omitted." << std::endl;
  }
}

double locus::findnormal(arma::vec v)
{
  double resultat;
  arma::uword p1;

  if(v.n_elem > 50)
  {
    arma::vec x = arma::linspace<arma::vec>(0, v.max(), v.n_elem/10);
    arma::vec C = x + (x(1) - x(0))*0.5;
    arma::uvec h = arma::histc(v, x);
    arma::vec temp = h % x % x % x;
    (void)temp.max(p1);
    arma::uvec q1 = arma::find(( x > (x(p1) - 0.05) ) % ( x < (x(p1) + 0.25) ));
    arma::uvec temp2 = h.elem(q1);
    (void)temp2.max(p1);
    resultat = C(q1(p1));
  }
  else
  {
    resultat = arma::median(v);
  }
  return resultat;
}

void locus::refind(arma::vec bafprev, arma::vec & vAprev, arma::vec & vBprev, arma::uvec & elements)
{
  arma::uvec loc2;
  double nA, nB;
  
  arma::vec baf = bafprev.elem(elements);
  arma::vec vA = vAprev.elem(elements);
  arma::vec vB = vBprev.elem(elements);
  
  arma::vec x  = arma::linspace<arma::vec>(baf.min(), baf.max(), 20);
  arma::uvec h = arma::hist(baf, x);
  arma::vec hd = arma::conv_to<arma::vec>::from(h);
  hd = hd/arma::sum(hd);
  arma::uvec loc;
  arma::vec pks;
  findpeaks(hd, 0.05, 2, 3, pks, loc);

  if(loc.n_elem > 1)
  {
    arma::uvec i = arma::find( ( x.elem(loc.rows(1,loc.n_elem - 1)) - x(loc(0)) ) > 0.1 );
    if(i.n_elem > 0)
    {
      loc = arma::join_cols( loc.rows(0,0), loc.elem(i + 1) );
      arma::vec temp = hd.elem( loc.rows(1,loc.n_elem - 1) );
      arma::uword p;
      (void)temp.max(p);
      loc2 = arma::uvec(2);
      loc2(0) = loc(0);
      loc2(1) = loc(p+1);
    }
    else
    {
      loc2 = loc.rows(0,0);
    }
  }
  else
  {
    loc2 = loc;
  }
  
  if(loc2.n_elem == 1)
  {
    nA = findnormal(vA);
    vAprev = vAprev/nA;
    vBprev = vBprev/nA;
  }
  else
  {
    arma::vec temp = hd.rows(loc2.min(), loc2.max());
    unsigned int L;
    (void)temp.min(L);
    L = L + loc2.min();
    nA = arma::median( vA.elem( arma::find(baf<x(L)) ) );
    arma::uvec utemp = arma::find(baf>x(L));
    nB = nA * arma::median(vB.elem(utemp))/arma::median(vA.elem(utemp));
    vAprev = vAprev/nA;
    vBprev = vBprev/nB;
  }
}

void locus::escriu_genotype(ofstream & fitxer)
{
    rqc = gt.qc;
    rgenotypes.fill(-1);
    for(unsigned int it=0; it<filtre.n_elem; it++)
    {
      rgenotypes(filtre(it)) = gt.genotypes(it);
    }
    
    
    fitxer << nom << " " << chr << " " << posicio << " " << rqc;
    for(unsigned int it=0; it<rgenotypes.n_elem; it++)
    {
      fitxer << " " << rgenotypes(it);
    }
    fitxer << std::endl;
}

void locus::escriu_genotype_sc(ofstream & fitxer)
{
    rgSC.fill(-1);
    for(unsigned int it=0; it<filtre.n_elem; it++)
    {
      rgSC(filtre(it)) = gt.sc(it);
    }
    
    
    fitxer << nom;
    for(unsigned int it=0; it<rgenotypes.n_elem; it++)
    {
      fitxer << " " << rgSC(it);
    }
    fitxer << std::endl;
}


void locus::escriu_cnvscores(ofstream & fitxer)
{
    rSC.fill(-1);
    for(unsigned int it=0; it<filtre.n_elem; it++)
    {
      rSC(filtre(it)) = cnv.SC(it);
    }
    
    
    fitxer << nom << " " << chr << " " << posicio << " " << cnv.Q1(2) << "/" << cnv.Q2(2) << " " << cnv.Q1(0) << "/" << cnv.Q2(0);
    for(unsigned int it=0; it<rSC.n_elem; it++)
    {
      fitxer << " " << rSC(it);
    }
    fitxer << std::endl;
}