-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
276 additions
and
423 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#!/usr/bin/env python | ||
""" A Monte Carlo to Hessian conversion tool """ | ||
|
||
__author__ = 'Stefano Carrazza' | ||
__license__ = 'GPL' | ||
__version__ = '1.0.0' | ||
__email__ = '[email protected]' | ||
|
||
|
||
import numpy | ||
import lhapdf | ||
from numba import jit | ||
|
||
class LocalPDF: | ||
""" A simple class for PDF manipulation """ | ||
def __init__(self, pdf_name, nrep, xgrid, fl, Q, eps): | ||
self.pdf_name = pdf_name | ||
self.pdf = lhapdf.mkPDFs(pdf_name) | ||
self.n = nrep | ||
self.n_rep = len(self.pdf)-1 | ||
self.fl = fl | ||
self.xgrid = xgrid | ||
self.Q = Q | ||
self.xfxQ = numpy.zeros(shape=(self.n_rep, fl.n, xgrid.n)) | ||
self.base = numpy.zeros(shape=self.n_rep, dtype=numpy.int64) | ||
self.fin = numpy.zeros(shape=self.n, dtype=numpy.int64) | ||
|
||
for i in range(self.n_rep): self.base[i] = i+1 | ||
for i in range(self.n): self.fin[i] = i+1 | ||
|
||
# precomputing values | ||
for r in range(self.n_rep): | ||
for f in range(fl.n): | ||
for ix in range(xgrid.n): | ||
self.xfxQ[r, f, ix] = self.pdf[self.base[r]].xfxQ(fl.id[f], xgrid.x[ix], Q) | ||
|
||
# precomputing averages | ||
self.f0 = numpy.zeros(shape=(fl.n, xgrid.n)) | ||
for f in range(fl.n): | ||
for ix in range(xgrid.n): | ||
self.f0[f, ix] = self.pdf[0].xfxQ(fl.id[f], xgrid.x[ix], Q) | ||
|
||
# compute std dev | ||
self.std = numpy.std(self.xfxQ, axis=0, ddof=1) | ||
|
||
# lower, upper 68cl | ||
self.std68 = numpy.zeros(shape=(fl.n, xgrid.n)) | ||
for f in range(fl.n): | ||
low, up = get_limits(self.xfxQ[:,f,:], self.f0[f,:]) | ||
self.std68[f] = (up-low)/2.0 | ||
|
||
# maximum difference between std vs 68cl. -> create pandas array | ||
self.mask = numpy.array([ abs(1 - self.std[f,:]/self.std68[f,:]) <= eps for f in range(fl.n)]) | ||
print " [Info] Keeping ", numpy.count_nonzero(self.mask), "nf*nx using (1-std/68cl) <= eps =", eps | ||
|
||
@jit | ||
def fill_cov(self, nf, nx, xfxQ, f0, mask): | ||
n = len(xfxQ) | ||
cov = numpy.zeros(shape=(nf*nx,nf*nx)) | ||
for fi in range(nf): | ||
for fj in range(nf): | ||
for ix in range(nx): | ||
for jx in range(nx): | ||
if mask[fi, ix] and mask[fj, jx]: | ||
i = nx*fi+ix | ||
j = nx*fj+jx | ||
for r in range(n): | ||
cov[i, j] += (xfxQ[r, fi, ix] - f0[fi,ix])*(xfxQ[r, fj, jx] - f0[fj,jx]) | ||
return cov/(n-1.0) | ||
|
||
def pdfcovmat(self): | ||
""" Build PDF covariance matrices """ | ||
cov = self.fill_cov(self.fl.n, self.xgrid.n, self.xfxQ, self.f0, self.mask) | ||
return cov | ||
|
||
@jit | ||
def rebase(self, basis): | ||
fin = numpy.sort(basis) | ||
ind = 0 | ||
negative = numpy.zeros(shape=(self.n_rep-self.n), dtype=numpy.int64) | ||
for i in range(self.n_rep): | ||
it = False | ||
for j in fin: | ||
if j == i+1: it = True | ||
if it == False: | ||
negative[ind] = i+1 | ||
ind+=1 | ||
self.fin = fin | ||
self.base = numpy.append(fin, negative) | ||
|
||
# precomputing values | ||
for r in range(self.n_rep): | ||
for f in range(self.fl.n): | ||
for ix in range(self.xgrid.n): | ||
self.xfxQ[r, f, ix] = self.pdf[self.base[r]].xfxQ(self.fl.id[f], self.xgrid.x[ix], self.Q) | ||
|
||
def get_limits(xfxQ, f0): | ||
reps,l = xfxQ.shape | ||
d = numpy.abs(xfxQ - f0) | ||
ind = numpy.argsort(d, axis=0) | ||
ind68 = 68*reps//100 | ||
sr = xfxQ[ind,numpy.arange(0,l)][:ind68,:] | ||
up1s = numpy.max(sr,axis=0) | ||
low1s = numpy.min(sr,axis=0) | ||
return low1s, up1s | ||
|
||
class XGrid: | ||
""" The x grid points used by the test """ | ||
def __init__(self, xminlog=1e-5, xminlin=1e-1, nplog=25, nplin=25): | ||
self.x = numpy.append(numpy.logspace(numpy.log10(xminlog), numpy.log10(xminlin), num=nplog, endpoint=False), | ||
numpy.linspace(xminlin, 0.9, num=nplin, endpoint=False)) | ||
self.n = len(self.x) | ||
|
||
class Flavors: | ||
""" The flavor container """ | ||
def __init__(self, nf=3): | ||
self.id = numpy.arange(-nf,nf+1) | ||
self.n = len(self.id) | ||
|
||
def invcov_sqrtinvcov(cov): | ||
val, vec = numpy.linalg.eigh(cov) | ||
invval = numpy.zeros(shape=len(val)) | ||
sqrtval = numpy.zeros(shape=len(val)) | ||
for i in range(len(val)): | ||
if val[i] > 1e-12: | ||
invval[i] = 1.0/val[i] | ||
sqrtval[i] = 1.0/val[i]**0.5 | ||
else: | ||
print " [Warning] Removing eigenvalue", i, val[i] | ||
|
||
invcov = numpy.dot(vec, numpy.diag(invval)).dot(vec.T) | ||
sqrtinvcov = numpy.dot(vec, numpy.diag(sqrtval)).dot(vec.T) | ||
return invcov, sqrtinvcov |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Tue Apr 21 22:00:01 2015 | ||
@author: zah | ||
""" | ||
|
||
import os | ||
import sys | ||
import shutil | ||
import lhapdf | ||
import numpy as np | ||
|
||
import pandas as pd | ||
|
||
def split_sep(f): | ||
for line in f: | ||
if line.startswith(b'---'): | ||
break | ||
yield line | ||
|
||
def read_xqf_from_file(f): | ||
|
||
lines = split_sep(f) | ||
try: | ||
(xtext, qtext, ftext) = [next(lines) for _ in range(3)] | ||
except StopIteration: | ||
return None | ||
xvals = np.fromstring(xtext, sep = " ") | ||
qvals = np.fromstring(qtext, sep = " ") | ||
fvals = np.fromstring(ftext, sep = " ", dtype=np.int) | ||
vals = np.fromstring(b''.join(lines), sep= " ") | ||
return pd.Series(vals, index = pd.MultiIndex.from_product((xvals, qvals, fvals))) | ||
|
||
|
||
def read_xqf_from_lhapdf(pdf, rep0grids): | ||
indexes = tuple(rep0grids.index) | ||
vals = [] | ||
for x in indexes: | ||
vals += [pdf.xfxQ(x[3],x[1],x[2])] | ||
return pd.Series(vals, index = rep0grids.index) | ||
|
||
def read_all_xqf(f): | ||
while True: | ||
result = read_xqf_from_file(f) | ||
if result is None: | ||
return | ||
yield result | ||
|
||
#TODO: Make pdf_name the pdf_name instead of path | ||
def load_replica_2(rep, pdf_name, pdf=None, rep0grids=None): | ||
|
||
sys.stdout.write("-> Reading replica from LHAPDF %d \r" % rep) | ||
sys.stdout.flush() | ||
|
||
suffix = str(rep).zfill(4) | ||
with open(pdf_name + "_" + suffix + ".dat", 'rb') as inn: | ||
header = b"".join(split_sep(inn)) | ||
|
||
if rep0grids is not None: | ||
xfqs = read_xqf_from_lhapdf(pdf, rep0grids) | ||
else: | ||
xfqs = list(read_all_xqf(inn)) | ||
xfqs = pd.concat(xfqs, keys=range(len(xfqs))) | ||
return header, xfqs | ||
|
||
#Split this to debug easily | ||
def _rep_to_buffer(out, header, subgrids): | ||
sep = b'---' | ||
out.write(header) | ||
out.write(sep) | ||
for _,g in subgrids.groupby(level=0): | ||
out.write(b'\n') | ||
ind = g.index.get_level_values(1).unique() | ||
np.savetxt(out, ind, fmt='%.7E',delimiter=' ', newline=' ') | ||
out.write(b'\n') | ||
ind = g.index.get_level_values(2).unique() | ||
np.savetxt(out, ind, fmt='%.7E',delimiter=' ', newline=' ') | ||
out.write(b'\n') | ||
#Integer format | ||
ind = g.index.get_level_values(3).unique() | ||
np.savetxt(out, ind, delimiter=' ', fmt="%d", | ||
newline=' ') | ||
out.write(b'\n ') | ||
#Reshape so printing is easy | ||
reshaped = g.reshape((len(g.groupby(level=1))*len(g.groupby(level=2)), | ||
len(g.groupby(level=3)))) | ||
np.savetxt(out, reshaped, delimiter=" ", newline="\n ", fmt='%14.7E') | ||
out.write(sep) | ||
|
||
def write_replica(rep, pdf_name, header, subgrids): | ||
suffix = str(rep).zfill(4) | ||
with open(pdf_name + "_" + suffix + ".dat", 'wb') as out: | ||
_rep_to_buffer(out, header, subgrids) | ||
|
||
def load_all_replicas(pdf, pdf_name): | ||
rep0headers, rep0grids = load_replica_2(0,pdf_name) | ||
headers, grids = zip(*[load_replica_2(rep, pdf_name, pdf.pdf[pdf.base[rep]], rep0grids) for rep in range(pdf.n)]) | ||
return [rep0headers] + list(headers), [rep0grids] + list(grids) | ||
|
||
def big_matrix(gridlist): | ||
central_value = gridlist[0] | ||
X = pd.concat(gridlist[1:], axis=1, | ||
keys=range(1,len(gridlist)+1), #avoid confusion with rep0 | ||
).subtract(central_value, axis=0) | ||
if np.any(X.isnull()) or X.shape[0] != len(central_value): | ||
raise ValueError("Incompatible grid specifications") | ||
return X | ||
|
||
def hessian_from_lincomb(pdf, V): | ||
|
||
# preparing output folder | ||
neig = V.shape[1] | ||
|
||
base = lhapdf.paths()[0] + "/" + pdf.pdf_name + "/" + pdf.pdf_name | ||
file = pdf.pdf_name + "_hessian_" + str(neig) | ||
if not os.path.exists(file): os.makedirs(file) | ||
|
||
# copy replica 0 | ||
shutil.copy(base + "_0000.dat", file + "/" + file + "_0000.dat") | ||
|
||
inn = open(base + ".info", 'rb') | ||
out = open(file + "/" + file + ".info", 'wb') | ||
for l in inn.readlines(): | ||
if l.find("SetDesc:") >= 0: out.write("SetDesc: \"Hessian " + pdf.pdf_name + "_hessian\"\n") | ||
elif l.find("NumMembers:") >= 0: out.write("NumMembers: " + str(neig+1) + "\n") | ||
elif l.find("ErrorType: replicas") >= 0: out.write("ErrorType: symmhessian\n") | ||
else: out.write(l) | ||
inn.close() | ||
out.close() | ||
|
||
headers, grids = load_all_replicas(pdf, base) | ||
hess_name = file + '/' + file | ||
result = (big_matrix(grids).dot(V)).add(grids[0], axis=0, ) | ||
hess_header = b"PdfType: error\nFormat: lhagrid1\n" | ||
for column in result.columns: | ||
write_replica(column + 1, hess_name, hess_header, result[column]) |
Oops, something went wrong.