Python Trajectory.enum_chunks_from_lhdf方法代码示例

本文整理汇总了Python中msmbuilder.Trajectory.enum_chunks_from_lhdf方法的典型用法代码示例。如果您正苦于以下问题：Python Trajectory.enum_chunks_from_lhdf方法的具体用法？Python Trajectory.enum_chunks_from_lhdf怎么用？Python Trajectory.enum_chunks_from_lhdf使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类msmbuilder.Trajectory的用法示例。

在下文中一共展示了Trajectory.enum_chunks_from_lhdf方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
def run(project, atom_indices=None, traj_fn = 'all'):

    n_atoms = project.load_conf()['XYZList'].shape[1]

    if traj_fn.lower() == 'all':

        SASA = np.ones((project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1

        for traj_ind in xrange(project.n_trajs):
            traj_asa = []
            logger.info("Working on Trajectory %d", traj_ind)
            traj_fn = project.traj_filename(traj_ind)
            chunk_ind = 0
            for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ):
                #print chunk_ind
                traj_asa.extend(asa.calculate_asa(traj_chunk, n_sphere_points = 24))
                chunk_ind += 1
            SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa

    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ):
            traj_asa.extend( asa.calculate_asa( traj_chunk ) )

        SASA = np.array(traj_asa)

    return SASA

开发者ID:chrismichel，项目名称:msmbuilder，代码行数:29，代码来源:CalculateProjectSASA.py

示例2: run

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
def run( project, output, num_procs=1, chunk_size=50000, traj_fn='all' ):

    pool = mp.Pool( num_procs )

    dssp_assignments = []

    if traj_fn.lower() == 'all':

        for i in xrange( project.n_trajs ):
            traj_dssp_assignments = []
            N = project.traj_lengths[i]
            j = 0
            for trj_chunk in Trajectory.enum_chunks_from_lhdf( project.traj_filename( i ), ChunkSize=chunk_size ):
                result = pool.map_async( analyze_conf, trj_chunk['XYZList'] )
                result.wait()

                traj_dssp_assignments.extend( result.get() )

                j+=len(trj_chunk)
                print "Trajectory %d: %d / %d" % (i, j, N)
            dssp_assignments.append( traj_dssp_assignments )
    
    else:
        traj_dssp_assignments = []
        N = Trajectory.load_from_lhdf(traj_fn, JustInspect=True)[0]
        j = 0
        for trj_chunk in Trajectory.enum_chunks_from_lhdf(traj_fn, ChunkSize=chunk_size):
            result = pool.map_async(analyze_conf, trj_chunk['XYZList'])
            result.wait()

            traj_dssp_assignments.extend(result.get())

            j+=len(trj_chunk)
            print "Trajectory %s: %d / %d" % (traj_fn, j, N)
        dssp_assignments.append(traj_dssp_assignments)

    dssp_assignments = np.array( dssp_assignments )
    np.save( output, dssp_assignments )
    DEVNULL.close()

开发者ID:schwancr，项目名称:schwancr_bin，代码行数:41，代码来源:CalculateProjectSS.py

示例3: run

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
def run(prep_metric, project, delta_time, atom_indices=None, 
        output='tICAData.h5', min_length=0, stride=1):

    # We will load the trajectories at the stride, so we need to find
    # what dt should be once we've strided by some amount
    lag = delta_time / stride
    
    if (float(delta_time) / stride) != lag:
        raise Exception("Stride must be a divisor of delta_time.")

    if lag > 0: # Then we're doing tICA
        tica_obj = tICA(lag=lag, calc_cov_mat=True, prep_metric=prep_metric)
    else: # If lag is zero, this is equivalent to regular PCA
        tica_obj = tICA(lag=lag, calc_cov_mat=False, prep_metric=prep_metric)
    
    for i in xrange(project.n_trajs):
        logger.info("Working on trajectory %d" % i)

        if project.traj_lengths[i] <= lag:
            logger.info("Trajectory is not long enough for this lag "
                        "(%d vs %d)", project.traj_lengths[i], lag)
            continue

        if project.traj_lengths[i] < min_length:
            logger.info("Trajectory is not longer than min_length "
                        "(%d vs %d)", project.traj_lengths[i], min_length)
            continue

        for traj_chunk in Trajectory.enum_chunks_from_lhdf(project.traj_filename(i),
                Stride=stride, AtomIndices=atom_indices):

            tica_obj.train(trajectory=traj_chunk)

    tica_obj.solve()
    tica_obj.save(output)
    logger.info("Saved output to %s", output)

    return tica_obj

开发者ID:raviramanathan，项目名称:msmbuilder，代码行数:40，代码来源:tICA_train.py

示例4: load_prep_trajectories

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
def load_prep_trajectories(project, stride, atom_indices, metric):
    """load the trajectories but prepare them during the load.
    This is helpful for metrics that use dimensionality reduction
    so you can use more frames without a MemoryError
    """
    list_of_ptrajs = []
    which = []
    for i in xrange(project.n_trajs):

        which_frames = np.arange(0, project.traj_lengths[i], stride)
    
        which.extend(zip([i] * len(which_frames), which_frames))

        ptraj = []
        for trj_chunk in Trajectory.enum_chunks_from_lhdf(project.traj_filename(i),
                            Stride=stride, AtomIndices=atom_indices):

            ptrj_chunk = metric.prepare_trajectory(trj_chunk)
            ptraj.append(ptrj_chunk)
    
        ptraj = np.concatenate(ptraj)
        list_of_ptrajs.append(ptraj)

    return list_of_ptrajs, np.array(which)

开发者ID:raviramanathan，项目名称:msmbuilder，代码行数:26，代码来源:Cluster.py

示例5: PdfPages

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
else:
    AvgCMs_1d = None

if Ass.max() < 250:
    pp = PdfPages(args.out_plot)
else:
    pp = None

chunk_size = 10000

if AvgCMs_1d == None:

    for traj_ind in xrange(Ass.shape[0]):
        logger.info("Working on %s" % Proj.traj_filename(traj_ind))
        for chunk_ind, trj_chunk in enumerate(
            Trajectory.enum_chunks_from_lhdf(Proj.traj_filename(traj_ind), ChunkSize=chunk_size)
        ):
            logger.debug("chunked")
            ptrj_chunk = get_hb(trj_chunk).astype(float)
            ass_chunk = Ass[traj_ind][
                chunk_ind * chunk_size : (chunk_ind + 1) * chunk_size
            ]  # this behaves as you want at the end of the array

            for i, ass in enumerate(ass_chunk):
                if ass == -1:
                    continue
                CMs_1d[ass] += ptrj_chunk[i]

    # StateAssigns = np.array([ np.where( Ass == i )[0].shape[0] for i in np.unique( Ass[ np.where( Ass >= 0 ) ] )] )
    StateAssigns = np.bincount(Ass[np.where(Ass != -1)], minlength=Ass.max() + 1)
    StateAssigns = StateAssigns.reshape((len(StateAssigns), 1))

开发者ID:schwancr，项目名称:schwancr_bin，代码行数:33，代码来源:getAvgHB2.py

示例6: assign_with_checkpoint

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
def assign_with_checkpoint(metric, project, generators, assignments_path, distances_path, chunk_size=10000):
    """
    Assign every frame to its closest generator
    
    Parameters
    ----------
    metric : msmbuilder.metrics.AbstractDistanceMetric
        A distance metric used to define "closest"
    project : msmbuilder.Project
        Used to load the trajectories
    generators : msmbuilder.Trajectory
        A trajectory containing the structures of all of the cluster centers
    assignments_path : str
        Path to a file that contains/will contain the assignments, as a 2D array
        of integers in hdf5 format
    distances_path : str
        Path to a file that contains/will contain the assignments, as a 2D array
        of integers in hdf5 format
    chunk_size : int
        The number of frames to load and process per step. The optimal number
        here depends on your system memory -- it should probably be roughly
        the number of frames you can fit in memory at any one time. Note, this
        is only important if your trajectories are long, as the effective chunk_size
        is really `min(traj_length, chunk_size)`
        
    Notes
    -----
    The results will be checkpointed along the way, trajectory by trajectory. So if
    the process is killed, it should be able to roughly pick up where it left off.
    """

    pgens = metric.prepare_trajectory(generators)
    
    # setup the file handles
    fh_a, fh_d = _setup_containers(project, assignments_path, distances_path)

    for i in xrange(project.n_trajs):
        if fh_a.root.completed_trajs[i] and fh_d.root.completed_trajs[i]:
            logger.info('Skipping trajectory %s -- already assigned', i)
            continue
        if fh_a.root.completed_trajs[i] or fh_d.root.completed_trajs[i]:
            raise RuntimeError('Corruption detected')
        logger.info('Assigning trajectory %s', i)
        
        # pointer to the position in the total trajectory where
        # the current chunk starts, so we know where in the Assignments
        # array to put each batch of data
        start_index = 0
        
        for tchunk in Trajectory.enum_chunks_from_lhdf(project.traj_filename(i), ChunkSize=chunk_size):
            ptchunk = metric.prepare_trajectory(tchunk)
            this_length = len(ptchunk)
            
            distances = np.empty(this_length, dtype=np.float32)
            assignments = np.empty(this_length, dtype=np.int)

            for j in xrange(this_length):
                d = metric.one_to_all(ptchunk, pgens, j)
                ind = np.argmin(d)
                assignments[j] = ind
                distances[j] = d[ind]
            
            end_index = start_index+this_length
            fh_a.root.arr_0[i, start_index:end_index] = assignments
            fh_d.root.arr_0[i, start_index:end_index] = distances
            
            # i'm not sure exactly what the optimal flush frequency is
            fh_a.flush()
            fh_d.flush()
            start_index = end_index
                
        # we're going to keep duplicates of this record -- i.e. writing
        # it to both files

        # completed chunks are not checkpointed -- only completed trajectories
        # this means that if the process dies after completing 10/20 of the
        # chunks in trajectory i -- those chunks are going to have to be recomputed
        # (put trajectory i-1 is saved)

        # this could be changed, but the implementation is a little tricky -- you
        # have to watch out for the fact that the person might call this function
        # with chunk_size=N, let it run for a while, kill it, and then call it
        # again with chunk_size != N. Dealing with that appropriately is tricky
        # since the chunks wont line up in the two cases
        fh_a.root.completed_trajs[i] = True
        fh_d.root.completed_trajs[i] = True

    
    fh_a.close()        
    fh_d.close()

开发者ID:jimsnyderjr，项目名称:msmbuilder，代码行数:92，代码来源:assigning.py

示例7: PdfPages

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
    CMs = None

if Ass.max() < 250:
    pp = PdfPages(args.out_plot)
else:
    pp = None

chunk_size = 10000

if CMs == None:

    for traj_ind in xrange(Ass.shape[0]):
        logger.info("Working on %s" % Proj.traj_filename(traj_ind))
        for chunk_ind, trj_chunk in enumerate(
            Trajectory.enum_chunks_from_lhdf(
                Proj.traj_filename(traj_ind), ChunkSize=chunk_size, AtomIndices=atom_indices
            )
        ):
            logger.debug("chunked")
            ptrj_chunk = HB.prepare_trajectory(trj_chunk).astype(float)
            ass_chunk = Ass[traj_ind][
                chunk_ind * chunk_size : (chunk_ind + 1) * chunk_size
            ]  # this behaves as you want at the end of the array

            for i, ass in enumerate(ass_chunk):
                if ass == -1:
                    continue
                CMs_1d[ass] += ptrj_chunk[i]

    # StateAssigns = np.array([ np.where( Ass == i )[0].shape[0] for i in np.unique( Ass[ np.where( Ass >= 0 ) ] )] )
    StateAssigns = np.bincount(Ass[np.where(Ass != -1)], minlength=Ass.max() + 1)

开发者ID:schwancr，项目名称:schwancr_bin，代码行数:33，代码来源:getAvgHB.py

示例8: assign_with_checkpoint

# 需要导入模块: from msmbuilder import Trajectory [as 别名]
# 或者: from msmbuilder.Trajectory import enum_chunks_from_lhdf [as 别名]
def assign_with_checkpoint(metric, project, generators, assignments_path,
                           distances_path, chunk_size=10000, atom_indices_to_load=None):
    """
    Assign every frame to its closest generator

    The results will be checkpointed along the way, trajectory by trajectory.
    If the process is killed, it should be able to roughly pick up where it
    left off.

    Parameters
    ----------
    metric : msmbuilder.metrics.AbstractDistanceMetric
        A distance metric used to define "closest"
    project : msmbuilder.Project
        Used to load the trajectories
    generators : msmbuilder.Trajectory
        A trajectory containing the structures of all of the cluster centers
    assignments_path : str
        Path to a file that contains/will contain the assignments, as a 2D array
        of integers in hdf5 format
    distances_path : str
        Path to a file that contains/will contain the assignments, as a 2D array
        of integers in hdf5 format
    chunk_size : int
        The number of frames to load and process per step. The optimal number
        here depends on your system memory -- it should probably be roughly
        the number of frames you can fit in memory at any one time. Note, this
        is only important if your trajectories are long, as the effective chunk_size
        is really `min(traj_length, chunk_size)`
    atom_indices_to_load : {None, list}
        The indices of the atoms to load for each trajectory chunk. Note that
        this method is responsible for loading up atoms from the project, but
        does NOT load up the generators. Those are passed in as a trajectory
        object (above). So if the generators are already subsampled to a restricted
        set of atom indices, but the trajectories on disk are NOT, you'll
        need to pass in a set of indices here to resolve the difference.

    See Also
    --------
    assign_in_memory
    """

    pgens = metric.prepare_trajectory(generators)

    # setup the file handles
    fh_a, fh_d = _setup_containers(project, assignments_path, distances_path)

    for i in xrange(project.n_trajs):
        if fh_a.root.completed_trajs[i] and fh_d.root.completed_trajs[i]:
            logger.info('Skipping trajectory %s -- already assigned', i)
            continue
        if fh_a.root.completed_trajs[i] or fh_d.root.completed_trajs[i]:
            logger.warn("Re-assigning trajectory even though some data is"
                        " available...")
            fh_a.root.completed_trajs[i] = False
            fh_d.root.completed_trajs[i] = False
        logger.info('Assigning trajectory %s', i)

        # pointer to the position in the total trajectory where
        # the current chunk starts, so we know where in the Assignments
        # array to put each batch of data
        start_index = 0

        filename = project.traj_filename(i)
        chunkiter = Trajectory.enum_chunks_from_lhdf(filename,
                ChunkSize=chunk_size, AtomIndices=atom_indices_to_load)
        for tchunk in chunkiter:
            if tchunk['XYZList'].shape[1] != generators['XYZList'].shape[1]:
                msg = ("Number of atoms in generators does not match "
                       "traj we're trying to assign! Maybe check atom indices?")
                raise ValueError(msg)

            ptchunk = metric.prepare_trajectory(tchunk)

            this_length = len(ptchunk)

            distances = np.empty(this_length, dtype=np.float32)
            assignments = np.empty(this_length, dtype=np.int)

            for j in xrange(this_length):
                d = metric.one_to_all(ptchunk, pgens, j)
                ind = np.argmin(d)
                assignments[j] = ind
                distances[j] = d[ind]

            end_index = start_index+this_length
            fh_a.root.arr_0[i, start_index:end_index] = assignments
            fh_d.root.arr_0[i, start_index:end_index] = distances

            # i'm not sure exactly what the optimal flush frequency is
            fh_a.flush()
            fh_d.flush()
            start_index = end_index

        # we're going to keep duplicates of this record -- i.e. writing
        # it to both files

        # completed chunks are not checkpointed -- only completed trajectories
        # this means that if the process dies after completing 10/20 of the
        # chunks in trajectory i -- those chunks are going to have to be recomputed
#.........这里部分代码省略.........

开发者ID:raviramanathan，项目名称:msmbuilder，代码行数:103，代码来源:assigning.py

注：本文中的msmbuilder.Trajectory.enum_chunks_from_lhdf方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。