Friday 31 July 2009

read and write data


   1:  using System;
   2:  using System.Collections.Generic;
   3:  using System.IO;
   4:  using System.Text;
   5:  using System.Windows.Forms;
   6:  namespace ReadData
   7:  {
   8:      public partial class Form1 : Form
   9:      {
  10:          static readonly StringBuilder outData = new StringBuilder();
  11:          readonly List<double> a = new List<double>(); // the 2 input TSs
  12:          readonly List<double> b = new List<double>();
  13:          public Form1()
  14:          {
  15:              InitializeComponent();
  16:          }
  17:          void Form1_Load(object sender, EventArgs e)
  18:          {
  19:              LoadData();
  20:              int totalFound = 0;
  21:              double lastDiff = 0;
  22:              bool haveEntry = false;
  23:              const int window = 100; // how many values we use to estimate mean and stdDev
  24:              for (int i = window; i <= a.Count; i++)
  25:              //for (int i = 144; i < 145; i++) // one sample only, for testing
  26:              {
  27:                  double meanA = GetMean(a, i - window, i);
  28:                  double meanB = GetMean(b, i - window, i);
  29:                  double stdA = GetStd(a, i - window, i, meanA);
  30:                  double stdB = GetStd(b, i - window, i, meanB);
  31:                  double aNorm = (a[i - 1] - meanA) / stdA;
  32:                  double bNorm = (b[i - 1] - meanB) / stdB;
  33:                  double diff = aNorm - bNorm;
  34:                  double absDiff = Math.Abs(diff);
  35:                  if (haveEntry &amp;&amp; ((lastDiff > 1 &amp;&amp; diff <> -1)))
  36:                  {
  37:                      Console.WriteLine("==== Exit index " + i);
  38:                      haveEntry = false;
  39:                  }
  40:                  //outData.AppendLine(i + ", " + diff); // for writing to file
  41:                  // outData.AppendLine(diff.ToString()); // for writing to file
  42:                  outData.AppendLine(string.Format("{0:d4}", i-window+1) + " " + diff); // for writing to file
  43:                  if (absDiff > 15)
  44:                  {
  45:                      totalFound++;
  46:                      Console.WriteLine("Found at index " + i + " diff " + diff);
  47:                      if (absDiff < Math.Abs(lastDiff) &amp;&amp; !haveEntry)
  48:                      {
  49:                          Console.WriteLine("==== Entry index " + i);
  50:                          haveEntry = true;
  51:                      }
  52:                  }
  53:                  lastDiff = diff;
  54:              }
  55:              Console.WriteLine("total found " + totalFound);
  56:              WriteData();
  57:          }
  58:          static void WriteData()
  59:          {
  60:              const string fileout = "out.csv";
  61:              if (File.Exists(fileout))
  62:              {
  63:                  File.Delete(fileout);
  64:              }
  65:              using (StreamWriter sw = new StreamWriter(fileout))
  66:              {
  67:                  sw.Write(outData.ToString());
  68:              }
  69:          }
  70:          static double GetStd(IList<double> array, int start, int limit, double mean)
  71:          {
  72:              double sum = 0;
  73:              for (int i = start; i < limit; i++)
  74:              {
  75:                  sum += (array[i] - mean) * (array[i] - mean);
  76:              }
  77:              sum = Math.Sqrt(sum);
  78:              sum /= (limit - start);
  79:              return sum;
  80:          }
  81:          static double GetMean(IList<double> array, int start, int limit)
  82:          {
  83:              double sum = 0;
  84:              for (int i = start; i < limit; i++)
  85:              {
  86:                  sum += array[i];
  87:              }
  88:              sum /= (limit - start);
  89:              return sum;
  90:          }
  91:          /// <summary>
  92:          /// Assume input file has 2 columns, one for each TS we want to track.
  93:          /// </summary>
  94:          void LoadData()
  95:          {
  96:              using (StreamReader sr = new StreamReader("5m-db.csv"))
  97:              {
  98:                  string newData;
  99:                  a.Clear();
 100:                  b.Clear();
 101:                  while (sr.Peek() != -1)
 102:                  {
 103:                      newData = sr.ReadLine();
 104:                      string[] line = newData.Split(',');
 105:                      a.Add(double.Parse(line[0]));
 106:                      b.Add(double.Parse(line[1]));
 107:                  }
 108:                  Console.WriteLine("Loaded " + a.Count + " data points.");
 109:              }
 110:          }
 111:      }

free drive

http://www.freedrive.com/folder/23026

Wednesday 29 July 2009

R stuff for normal distribution fitting

Look at this site for more: http://www.bigre.ulb.ac.be/Users/jvanheld/statistics_bioinformatics/practicals/microarray_fitting_solutions.html


   1:   
   2:  # data loading
   3:  filepath <- system.file("data", "morley.tab" , package="datasets")
   4:  mm <- read.table(filepath)
   5:  m <- mm[,1]
   6:   
   7:  hist(m)
   8:   
   9:  ## install
  10:  library(fBasics)
  11:   
  12:  skewness(m)
  13:   
  14:  kurtosis(m)
  15:   
  16:  plot(density(m))
  17:   
  18:  plot(ecdf(m))
  19:   
  20:  qqnorm(m)
  21:  abline(0,1)
  22:   
  23:  gal <- m
  24:   
  25:  ## Calculate estimators
  26:  m <- mean(gal,na.rm=T)
  27:  s <- sd(gal,na.rm=T)
  28:   
  29:  ## Draw the density histogram of the galactose microarray values
  30:  h <- hist(gal,breaks=100,col='#CCCCFF',border='#CCCCFF',freq=F)
  31:   
  32:  ## On the histogram, draw vertical bars at the following values :
  33:  ## mean, mean + 1*sd, mean -1*sd, mean +2*sd, mean -2*sd
  34:  abline(v=c(m,m-s,m-2*s,m+s,m+2*s),col="#000088",lwd=1)
  35:   
  36:  ## Superimpose the theoretical distribution
  37:  lines(h$mids,dnorm(h$mids,m,s), type="l", lwd=2,col="red")

FITTING DISTRIBUTIONS WITH R

http://www.google.co.uk/search?hl=en&q=r+normal+distribution+fit&meta=

Thursday 23 July 2009

Very nice SQL articles and some tools

http://www.sommarskog.se/

SQL List indexes in db with fragmentation > 30 %


   1:  SELECT 
   2:      OBJECT_NAME(object_id) ObjectName,
   3:      index_id,
   4:      index_type_desc,
   5:      avg_fragmentation_in_percent
   6:  FROM sys.dm_db_index_physical_stats
   7:      (DB_ID('AdventureWorks'),NULL, NULL, NULL, 'LIMITED')
   8:  WHERE 
   9:      avg_fragmentation_in_percent > 30
  10:  ORDER BY 
  11:      OBJECT_NAME(object_id)

Tuesday 21 July 2009

Monday 20 July 2009

Friday 10 July 2009

Mobile UK directory - remove

You might want to send this on to your friends in the UKMaybe you have heard about this but early next week all UK mobiles will be on a directory which will mean that anyone will be able to access the numbers. It is easy to unsubscribe but it must be done before the=0beginning of next week to make sure that you are ex directory. You could be swamped by unsolicited messages and calls. Removal is recommended by the BBC - see link below. http://news.bbc.co.uk/1/hi/programmes/working_lunch/8091621.stm

Just done this myself. Double checked it wasn't a scam and it is actually on the bbc website recommending doing this.The Directory of Mobile Phone numbers goes live next week. Apparently, all numbers including those belonging to children will be open to cold calling and the general abuse that less scrupulous telesales people subject us too.To remove your number go here. (you need your mobile phone with you to do this, they text youa code) http://www.118800.co.uk/removeme/remove-me.html

When on the site, click "Home" then "Ex-directory" this will remove you from the directory.You can remove your number from this list, and tell all your friends - especially those with children who have mobile phones. A mobile number is private and you should be able to choose who you give it to - none of us agreed to this when we signed our mobile phone contracts.

Friday 3 July 2009

SQL start end time timing


   1:  declare @starttime datetime
   2:  set @starttime = getdate()
   3:   
   4:  --SQL
   5:   
   6:  print ' ... substring took ' + cast(datediff(ms,@starttime,getdate()) as varchar) + 'ms'
   7:  set @starttime = getdate()

Thursday 2 July 2009

Excel data reader library in codeplex

After doing quite a bit of research, this is the best and simplest excel data reader I managed to find for the .NET environment:

http://exceldatareader.codeplex.com/

Wednesday 1 July 2009

SQL output parameters in stored procedures

In C# add parameter of type output and obtain its value after cmd execution:


   1:  cmd.Parameters.Add("@newId", SqlDbType.Int);
   2:  cmd.Parameters["@newId"].Direction = ParameterDirection.Output;
   3:  cmd.ExecuteNonQuery();
   4:  int result = (int)cmd.Parameters["@newId"].Value;
   5:  return result;

In sproc, declare the parameter as type output:


   1:  Create procedure... (
   2:  @param1 int, etc
   3:  @newId int OUTPUT
   4:  )
   5:  begin
   6:  ...
   7:  set @newId = 123
   8:  return
   9:  end