Source code for lfd.registration.solver

from __future__ import division

import numpy as np
import tps
import os.path
from joblib import Memory

import lfd.registration
if lfd.registration._has_cuda:
    import pycuda.gpuarray as gpuarray
    import pycuda.driver as drv
    import scikits.cuda.linalg as culinalg
    from lfd.tpsopt.culinalg_exts import gemm, geam

[docs]class TpsSolver(object): """ Fits thin plate spline to data using precomputed matrix products """ def __init__(self, N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef): self.rot_coef = rot_coef self.n, self.d = x_nd.shape self.N = N self.QN = QN self.NKN = NKN self.NRN = NRN self.NR = NR self.x_nd = x_nd self.K_nn = K_nn
[docs] def solve(self, wt_n, y_nd, bend_coef, f_res): raise NotImplementedError
[docs]class TpsSolverFactory(object): def __init__(self, use_cache=True, cachedir=None): """Inits TpsSolverFactory Args: use_cache: whether to cache solver matrices in file cache_dir: cached directory. if not specified, the .cache directory in parent directory of top-level package is used. """ if use_cache: if cachedir is None: # .cache directory in parent directory of top-level package cachedir = os.path.join(__import__(__name__.split('.')[0]).__path__[0], os.path.pardir, ".cache") memory = Memory(cachedir=cachedir, verbose=0) self.get_solver_mats = memory.cache(self.get_solver_mats)
[docs] def get_solver_mats(self, x_nd, rot_coef): """Precomputes several of the matrix products needed to fit a TPS exactly. A TPS is fit by solving the system: N'(Q'WQ + b K + R)N z = -N'(Q'W'y + N'R) x = Nz where K and R are padded with zeros appropriately. Returns: N, QN, N'KN, N'RN N'R """ raise NotImplementedError
[docs] def get_solver(self, x_nd, rot_coef): raise NotImplementedError
[docs]class CpuTpsSolver(TpsSolver): def __init__(self, N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef): super(CpuTpsSolver, self).__init__(N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef)
[docs] def solve(self, wt_n, y_nd, bend_coef, f_res): if y_nd.shape[0] != self.n or y_nd.shape[1] != self.d: raise RuntimeError("The dimensions of y_nd doesn't match the dimensions of x_nd") WQN = wt_n[:, None] * self.QN lhs = self.QN.T.dot(WQN) + bend_coef * self.NKN + self.NRN rhs = self.NR + WQN.T.dot(y_nd) z = np.linalg.solve(lhs, rhs) theta = self.N.dot(z) f_res.update(self.x_nd, y_nd, bend_coef, self.rot_coef, wt_n, theta, N=self.N, z=z)
[docs]class CpuTpsSolverFactory(TpsSolverFactory): def __init__(self, use_cache=True, cachedir=None): super(CpuTpsSolverFactory, self).__init__(use_cache=use_cache, cachedir=cachedir)
[docs] def get_solver_mats(self, x_nd, rot_coef): n,d = x_nd.shape K_nn = tps.tps_kernel_matrix(x_nd) A = np.r_[np.zeros((d+1,d+1)), np.c_[np.ones((n,1)), x_nd]].T n_cnts = A.shape[0] _u,_s,_vh = np.linalg.svd(A.T) N = _u[:,n_cnts:] NR = N[1:1+d,:].T * rot_coef KN = K_nn.dot(N[1+d:,:]) QN = np.c_[np.ones((n, 1)), x_nd].dot(N[:1+d,:]) + KN NKN = (N[1+d:,:].T).dot(KN) NRN = NR.dot(N[1:1+d,:]) return N, QN, NKN, NRN, NR, K_nn
[docs] def get_solver(self, x_nd, rot_coef): N, QN, NKN, NRN, NR, K_nn = self.get_solver_mats(x_nd, rot_coef) return CpuTpsSolver(N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef)
[docs]class GpuTpsSolver(TpsSolver): def __init__(self, N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef): if not lfd.registration._has_cuda: raise NotImplementedError("CUDA not installed") super(GpuTpsSolver, self).__init__(N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef) # the GPU cho_solve requires matrices to be f-contiguous when rhs is a matrix self.QN = self.QN.copy(order='F') self.sqrtWQN_gpu = gpuarray.empty(self.QN.shape, np.float64, order='F') self.NKN_gpu = gpuarray.to_gpu(self.NKN.copy(order='F')) self.NRN_gpu = gpuarray.to_gpu(self.NRN.copy(order='F')) self.lhs_gpu = gpuarray.empty(self.NKN_gpu.shape, np.float64, order='F') self.QN_gpu = gpuarray.to_gpu(self.QN) self.NR_gpu = gpuarray.to_gpu(self.NR.copy(order='F')) self.y_dnW_gpu = gpuarray.empty(self.x_nd.T.shape, np.float64, order='F') self.rhs_gpu = gpuarray.empty(self.NR_gpu.shape, np.float64, order='F') if lfd.registration._has_cula: self.N_gpu = gpuarray.to_gpu(self.N.copy(order='F')) self.theta_gpu = gpuarray.empty((1+self.d+self.n, self.d), np.float64, order='F') else: self.N_gpu = None self.theta_gpu = None
[docs] def solve(self, wt_n, y_nd, bend_coef, f_res): if y_nd.shape[0] != self.n or y_nd.shape[1] != self.d: raise RuntimeError("The dimensions of y_nd doesn't match the dimensions of x_nd") if not y_nd.flags.c_contiguous: raise RuntimeError("Expected y_nd to be c-contiguous but it isn't") self.sqrtWQN_gpu.set_async(np.sqrt(wt_n)[:,None] * self.QN) geam(self.NKN_gpu, self.NRN_gpu, self.lhs_gpu, alpha=bend_coef, beta=1) gemm(self.sqrtWQN_gpu, self.sqrtWQN_gpu, self.lhs_gpu, transa='T', alpha=1, beta=1) drv.memcpy_dtod_async(self.rhs_gpu.gpudata, self.NR_gpu.gpudata, self.rhs_gpu.nbytes) self.y_dnW_gpu.set_async(y_nd.T * wt_n) # use transpose so that it is f_contiguous gemm(self.QN_gpu, self.y_dnW_gpu, self.rhs_gpu, transa='T', transb='T', alpha=1, beta=1) if lfd.registration._has_cula: culinalg.cho_solve(self.lhs_gpu, self.rhs_gpu) z = self.rhs_gpu.get() culinalg.dot(self.N_gpu, self.rhs_gpu, out=self.theta_gpu) theta = self.theta_gpu.get() else: # if cula is not install perform the last two computations in the CPU z = np.linalg.solve(self.lhs_gpu.get(), self.rhs_gpu.get()) theta = self.N.dot(z) f_res.update(self.x_nd, y_nd, bend_coef, self.rot_coef, wt_n, theta, N=self.N, z=z)
[docs]class GpuTpsSolverFactory(TpsSolverFactory): def __init__(self, use_cache=True, cachedir=None): if not lfd.registration._has_cuda: raise NotImplementedError("CUDA not installed") super(GpuTpsSolverFactory, self).__init__(use_cache=use_cache, cachedir=cachedir)
[docs] def get_solver_mats(self, x_nd, rot_coef): n,d = x_nd.shape K_nn = tps.tps_kernel_matrix(x_nd) A = np.r_[np.zeros((d+1,d+1)), np.c_[np.ones((n,1)), x_nd]].T n_cnts = A.shape[0] _u,_s,_vh = np.linalg.svd(A.T) N = _u[:,n_cnts:].copy() NR = (N[1:1+d,:].T * rot_coef).copy() # so that it is c-contiguous N_gpu = gpuarray.to_gpu(N[1+d:,:]) K_gpu = gpuarray.to_gpu(K_nn) KN_gpu = culinalg.dot(K_gpu, N_gpu) QN = np.c_[np.ones((n, 1)), x_nd].dot(N[:1+d,:]) + KN_gpu.get() NKN_gpu = culinalg.dot(N_gpu, KN_gpu, transa='T') NKN = NKN_gpu.get() NRN = NR.dot(N[1:1+d,:]) return N, QN, NKN, NRN, NR, K_nn
[docs] def get_solver(self, x_nd, rot_coef): N, QN, NKN, NRN, NR, K_nn = self.get_solver_mats(x_nd, rot_coef) return GpuTpsSolver(N, QN, NKN, NRN, NR, x_nd, K_nn, rot_coef)
[docs]class AutoTpsSolverFactory(TpsSolverFactory): def __new__(cls, *args, **kwargs): from lfd.registration import _has_cuda if _has_cuda: new_instance = object.__new__(GpuTpsSolverFactory, *args, **kwargs) else: new_instance = object.__new__(CpuTpsSolverFactory, *args, **kwargs) return new_instance