Python:Simulating DNA Melting
Hi! I work, even though there's no writeup!
For now, you can copy and run the Python script, and follow along with the explanatory writeup from the Matlab version while it runs. In addition to Python, you will need to have installed the packages numpy, scipy, and matplotlib. (These packages are already installed on the lab computers.)
- Test on 309 lab computers and Athena.
- Works on 309 lab computers. Has the same issue of plots appearing sequentially and the script not proceeding until the plot window is dismissed. Should I write a version that waits until the end and plots everything all at once?
This code models forced heating followed by either forced or passive cooling. It differs slightly from the Matlab writeup, which was back when forced heating and cooling didn't exist and there was only passive cooling.
Also, now it optionally plots the melting curve (in addition to fluorescence signal vs. time) at the end.
# Simulating DNA Melting in Python # 20.309 # Translated from the Matlab by KAD, 4 Apr 2011 # v1.0 works the same as the Matlab # v1.1 including forced heating and option to plot melting curve at end # v1.2 including option for forced cooling instead of passive cooling # v1.3 changed format of output to match actual LabView VI output # (was wrong before) import numpy import scipy.io as sio import matplotlib.pyplot as plt def quickplot(xvector, yvector, title, xlabel, ylabel): """ Wrapper function for making a simple plot using matplotlib.pyplot """ plt.plot(xvector, yvector) plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.show() class Experiment: """ Empty class definition -- works like a struct in C or Matlab. """ pass def DnaFraction(Ct, T, DeltaS, DeltaH, R=1.987): """ Returns the fraction of dsDNA in a solution containing equal concentrations of two complementary ssDNA oligos, as a function of total [DNA], temperature, entropy of annealing, and enthalpy of annealing. Default units are mole, calorie, kelvin. For other unit systems, supply the appropriate value for the optional gas constant parameter. T can be a single value or a numpy.array of values. """ # Compute Ct * Keq CtKeq = Ct * numpy.exp(DeltaS/R - DeltaH/(R*T)) # Compute f f = (1 + CtKeq - numpy.sqrt(1 + 2*CtKeq)) / CtKeq return f def middles(arr): """ Returns a new numpy array, 1 element shorter than the input array, whose values come from averaging each two adjacent values in the input. """ result =  for i in range(0, len(arr) - 1): element = (arr[i] + arr[i+1]) / 2.0 result.append(element) return numpy.array(result) def diffedCurve(expt, plot=True): """ Takes an Experiment and calculates a differentiated melting curve. Optionally plots it. """ expt.dTemp = middles(expt.temp) expt.diffDsDnaFraction = numpy.diff(expt.dsDnaFraction) if plot: quickplot(expt.dTemp, -1 * expt.diffDsDnaFraction, "Inverted Derivative of Melting Curve", "Temperature (K)", "Delta dsDNA Fraction") def makeIdeal(plot=True): """ Returns an ideal/theoretical DNA melting experiment. Optionally plots it. """ ideal = Experiment() ideal.temp = numpy.array(range(0, 100)) + 273.15 ideal.deltaS = -184 ideal.deltaH = -71E3 ideal.concentration = 1E-6 ideal.dsDnaFraction = DnaFraction(ideal.concentration, ideal.temp, ideal.deltaS, ideal.deltaH) if plot: quickplot(ideal.temp, ideal.dsDnaFraction, "Ideal DNA Melting Curve", "Temperature (K)", "dsDNA Fraction") return ideal def makeSimulated(forcedCooling=False): """ Returns a simulated experimental run, from which we'll produce realistic noisy data. """ sim = Experiment() sim.concentration = 1e-6 sim.deltaS = -184 sim.deltaH = -71e3 sim.hotTemp = 363.0 #90C sim.coolTemp = 293.0 #20C sim.sampleRate = 1.0 #samples per second sim.forcedCooling = forcedCooling sim.heatingRate = 0.5 #degrees per second sim.heatingDuration = (sim.hotTemp - sim.coolTemp) / sim.heatingRate if sim.forcedCooling: sim.coolingRate = -0.5 #degrees per second sim.coolingDuration = (sim.coolTemp - sim.hotTemp) / sim.coolingRate else: sim.coolingDuration = 900.0 #15 min to cool (would really take a bit longer) # create time vectors (seconds) sim.heatingTime = numpy.array(numpy.arange(0.0, sim.heatingDuration, 1/sim.sampleRate)) sim.coolingTime = numpy.array(numpy.arange(0.0, sim.coolingDuration, 1/sim.sampleRate)) sim.time = numpy.array(numpy.arange(0.0, sim.heatingDuration + sim.coolingDuration, 1/sim.sampleRate)) return sim def simTemp(sim, plot=True): """ Accepts a simulated experiment and calculates its temperature over time, simulating forced heating and forced passive cooling """ # Heating! # T(t) = rate * time heating = sim.heatingRate * sim.heatingTime + sim.coolTemp # Forced cooling! # T(t) = rate * time if sim.forcedCooling: cooling = sim.coolingRate * sim.coolingTime + sim.hotTemp # Passive cooling! # T(t) = (Tf-Ti)e^-(t/tau) + Tf else: coolingConst = 180.0 #tau = 180s = 3min cooling = ((sim.hotTemp - sim.coolTemp) * numpy.exp(-sim.coolingTime / coolingConst)) + sim.coolTemp # Combine heating & cooling sim.temp = numpy.concatenate((heating, cooling)) if plot: quickplot(sim.time, sim.temp, "Simulated Temperature vs. Time", "Time (sec)", "Temperature (K)") def simRTD(sim, addNoise=True, plot=True): """ Simulates RTD voltage (proportional to temp) vs. time, either with or without noise. """ try: # R_RTD = 1000 + 3.85(T-273) (ohms) sim.rRTD = 1000 + 3.85 * (sim.temp - 273) except AttributeError: simTemp(sim, plot=False) # R_RTD = 1000 + 3.85(T-273) (ohms) sim.rRTD = 1000 + 3.85 * (sim.temp - 273) # RTD is in a voltage divider. # V across RTD = total voltage * (RTD / (other+RTD)). vPower = 15 #15V rOther = 20e3 #20kohms sim.vRTD = vPower * (sim.rRTD / (rOther + sim.rRTD)) if addNoise: #gaussian noise with stdev=1mV noise = numpy.random.normal(scale=0.001, size=len(sim.vRTD)) sim.vRTD += noise if plot: quickplot(sim.time, sim.vRTD, "Simulated RTD Voltage vs. Time", "Time (sec)", "RTD Voltage (V)") def simFluorescenceSignal(sim, gaussianNoise=True, shotNoise=True, photobleaching=True, plotFluor=True, plotSignal=True): """ Calculates dsDNA fraction vs. time, scales to obtain fluorescence signal, and optionally adds noise and photobleaching. """ title = "Fluorescence Signal Voltage vs. Time" sim.dsDnaFraction = DnaFraction(sim.concentration, sim.temp, sim.deltaS, sim.deltaH) # OK something is wrong with how this is implemented because when you # combine heating and photobleaching shit gets weirdly negative #simulate photobleaching of signal if photobleaching: #Set bleaching const such that 1/2 of fluorphores fail during experiment BleachingConstant = 0.5**(1.0/900) #compute difference sim.diffDsDnaFraction = numpy.diff(sim.dsDnaFraction) #initialize empty array sim.dsDnaFractionBleached = numpy.empty(len(sim.dsDnaFraction)) #set initial condition for difference eqn sim.dsDnaFractionBleached = 0.995 #For loop to compute difference equation for i in range(1, len(sim.dsDnaFraction)): sim.dsDnaFractionBleached[i] = BleachingConstant * sim.dsDnaFractionBleached[i-1] + sim.diffDsDnaFraction[i-1] title += "\n(+ Photobleaching)" #Moving variable names around to make the other code happy sim.dsDnaFractionUnbleached = sim.dsDnaFraction sim.dsDnaFraction = sim.dsDnaFractionBleached if plotFluor: quickplot(sim.time, sim.dsDnaFraction, "Relative Fluorescence vs. Time", "Time (sec)", "Relative Fluorescence (AU)") #Scale curve to simulate voltage output of photodiode / amplifier. #Amplifier range between 5 and 9 volts output ampRange = numpy.random.uniform(low=5.0, high=9.0) #Minimum between +/- 1 ampMin = numpy.random.uniform(low=-1.0, high=1.0) #Scale output sim.fluorSignal = ampRange * sim.dsDnaFraction + ampMin #gaussian random noise if gaussianNoise: #gaussian noise with stdev=10mV noise = numpy.random.normal(scale=0.01, size=len(sim.fluorSignal)) sim.fluorSignal += noise title += " (+ Gaussian Noise)" #shot (impulse) noise if shotNoise: # Technically this should be poisson-distributed, but generating # poisson noise is actually not trivial. An acceptable substitute is # to make every sample have some probability P(spike) of being a spike # and probability 1-P(spike) of being the actual fluorescence sample. # (See the book "Numerical recipes in $language".) #Uniformly distributed random numbers noise = numpy.random.rand(len(sim.fluorSignal)) #Probability of spike on any given sample spikeProbability = 1.0/75 #Turn list of probabilities into list of zeros and 10V spikes noise = map(lambda i: 10 if i<spikeProbability else 0, noise) #Add in spikes sim.fluorSignal = [max(s, n) for s, n in zip(sim.fluorSignal, noise)] title += " (+ Shot Noise)" if plotSignal: quickplot(sim.time, sim.fluorSignal, title, "Time (sec)", "Fluorescence Signal Voltage (V)") def writeOut(expt, filename, plot=True): """ Writes out simulated data in the same format as the LabView VI does. I.e. each line is: RTD voltage, tab, fluorescence voltage, tab, some number whose significance I don't understand. (I'm including that mysterious number here, even though it doesn't matter for Matlab's file I/O, because Python's file I/O cares how many fields there are in a line, the way I've written it. """ rtd = expt.vRTD fluor = expt.fluorSignal f = open(filename, 'w') for (i, j) in zip(rtd, fluor): f.write("%f\t%f\t%f\n" % (i, j, 1)) f.close() if plot: quickplot(rtd, fluor, "Melting Curve of Simulated Data", "RTD Voltage", "Fluorescence Signal") # If you run this script, plots will appear one after the other, # following the sequence of the write-up. # If you encounter a bug with the script drawing multiple plots in one run # (may happen on Mac OSX), change plot=True to plot=False as desired to plot # only one thing at a time. # Change other parameters as desired to add or remove noise. # ideal curve ideal = makeIdeal(plot=True) diffedCurve(ideal, plot=True) # simulated realistic data sim = makeSimulated(forcedCooling=True) simTemp(sim, plot=True) simRTD(sim, addNoise=True, plot=True) simFluorescenceSignal(sim, gaussianNoise=True, shotNoise=True, photobleaching=True, plotFluor=True, plotSignal=True) #write out file of sim data writeOut(sim, 'simdata.txt', plot=True)