tokenize_and_concatenate()
from transformer_lens.utils import tokenize_and_concatenate
pile = load_dataset("NeelNanda/pile-10k", split="train")
token_dataset = tokenize_and_concatenate(
dataset=pile,
tokenizer=tokenizer,
streaming=True,
max_length=1024,
add_bos_token=True,
)
get_act_name()
get_act_name(name="resid_pre like site name", layer=8)
name
layer_type_alias = {
"a": "attn",
"m": "mlp",
"b": "",
"block": "",
"blocks": "",
"attention": "attn",
}
act_name_alias = {
"attn": "pattern",
"attn_logits": "attn_scores",
"key": "k",
"query": "q",
"value": "v",
"mlp_pre": "pre",
"mlp_mid": "mid",
"mlp_post": "post",
}