euijinrnd
/

RoboticsDiffusionTransformer

Model card Files Files and versions

RoboticsDiffusionTransformer / configs /base.yaml

euijinrnd's picture

Add files using upload-large-folder tool

d899b9f verified 3 months ago

history blame contribute delete

2.15 kB

	common:
	# The number of historical images
	img_history_size: 2
	# The number of future actions to predict
	action_chunk_size: 64
	# The number of cameras to be used in the model
	num_cameras: 3
	# Dimension for state/action, we use the same space for both state and action
	# This MUST be equal to configs/state_vec.py
	state_dim: 128


	dataset:
	# We will extract the data from raw dataset
	# and store them in the disk buffer by producer
	# When training, we will read the data
	# randomly from the buffer by consumer
	# The producer will replace the data which has been
	# read by the consumer with new data

	# The path to the buffer (at least 400GB)
	buf_path: /home/jellyho/RDTBuffer
	# The number of chunks in the buffer
	buf_num_chunks: 128
	# The number of samples (step rather than episode) in each chunk
	buf_chunk_size: 128

	# We will filter the episodes with length less than `epsd_len_thresh_low`
	epsd_len_thresh_low: 32
	# For those more than `epsd_len_thresh_high`,
	# we will randomly sample `epsd_len_thresh_high` steps each time we load the episode
	# to better balance the training datasets
	epsd_len_thresh_high: 2048
	# How to fit the image size
	image_aspect_ratio: pad
	# Maximum number of language tokens
	tokenizer_max_length: 1024

	model:
	# Config for condition adpators
	lang_adaptor: mlp2x_gelu
	img_adaptor: mlp2x_gelu
	state_adaptor: mlp3x_gelu
	lang_token_dim: 4096
	img_token_dim: 1152
	# Dim of action or proprioception vector
	# A `state` refers to an action or a proprioception vector
	state_token_dim: 128
	# Config for RDT structure
	rdt:
	# 1B: num_head 32 hidden_size 2048
	hidden_size: 2048
	depth: 28
	num_heads: 32
	cond_pos_embed_type: multimodal
	# For noise scheduler
	noise_scheduler:
	type: ddpm
	num_train_timesteps: 1000
	num_inference_timesteps: 5
	beta_schedule: squaredcos_cap_v2 # Critical choice
	prediction_type: sample
	clip_sample: False
	# For EMA (params averaging)
	# We do not use EMA currently
	ema:
	update_after_step: 0
	inv_gamma: 1.0
	power: 0.75
	min_value: 0.0
	max_value: 0.9999