mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
[Falcon H1] Fix slow path forward pass (#38320)
* Create push-important-models.yml * feat: add falcon-h1 * fixup * address comment * fix * fix copies * fix copies * fix * fix * fix * fix * fix copies * fix * fix copies * fix test import to at least trigget the cis * yups * update * fix make fix copies * fix inits? * fix style * skip annoying test * add integration test for Falcon H1 * fix copies * fix * fix typo * make style * fix slow path generations * clean debug traces * debug * remove debug traces final confirmation * clean debug traces final * fix format and lineup * make style * debug * Update src/transformers/models/falcon_h1/modular_falcon_h1.py Co-authored-by: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com> * adress comments * fix fix-copies * fix integration test * Merge pull request #7 from ydshieh/fix-slow-path update * another update (#8) * update * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> --------- Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Co-authored-by: Younes Belkada <younesbelkada@gmail.com> Co-authored-by: younesbelkada <younes.belkada@tii.ae> Co-authored-by: Arthur Zucker <arthur.zucker@gmail.com> Co-authored-by: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
b5b76b5561
commit
7a9b071bfd
@ -604,9 +604,10 @@ class FalconH1Mixer(nn.Module):
|
||||
):
|
||||
# 1. Gated MLP's linear projection
|
||||
hidden_states = apply_mask_to_padding_states(hidden_states, attention_mask)
|
||||
# Add Multipliers
|
||||
hidden_states = hidden_states * self.ssm_in_multiplier
|
||||
projected_states = self.in_proj(hidden_states)
|
||||
projected_states = projected_states * self.mup_vector
|
||||
projected_states = projected_states * self.mup_vector # ADD Mup Multipliers
|
||||
d_to_remove = 2 * self.intermediate_size + 2 * self.n_groups * self.ssm_state_size + self.num_heads
|
||||
|
||||
# Set up dimensions for reshapes later
|
||||
@ -800,10 +801,13 @@ class FalconH1Mixer(nn.Module):
|
||||
|
||||
# 1. Gated MLP's linear projection
|
||||
input_states = apply_mask_to_padding_states(input_states, attention_mask)
|
||||
# Add Multipliers
|
||||
input_states = input_states * self.ssm_in_multiplier
|
||||
projected_states = self.in_proj(input_states)
|
||||
gate, hidden_states_B_C, dt = projected_states.split(
|
||||
[self.intermediate_size, self.conv_dim, self.num_heads], dim=-1
|
||||
)
|
||||
projected_states = projected_states * self.mup_vector # ADD Mup Multipliers
|
||||
gate, hidden_states_B_C, dt = projected_states.split([
|
||||
self.intermediate_size, self.conv_dim, self.num_heads
|
||||
], dim=-1)
|
||||
|
||||
use_precomputed_states = (
|
||||
cache_params is not None
|
||||
@ -914,8 +918,8 @@ class FalconH1Mixer(nn.Module):
|
||||
hidden_states = hidden_states.reshape(batch_size, seq_len, -1, self.head_dim).float()
|
||||
B = B.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
|
||||
C = C.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
|
||||
B = B.repeat(1, 1, self.num_heads // self.n_groups, 1)
|
||||
C = C.repeat(1, 1, self.num_heads // self.n_groups, 1)
|
||||
B = B.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
|
||||
C = C.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
|
||||
pad_size = (self.chunk_size - seq_len % self.chunk_size) % self.chunk_size
|
||||
|
||||
D_residual = self.D[..., None] * pad_tensor_by_size(hidden_states, pad_size)
|
||||
|
@ -415,9 +415,10 @@ class FalconH1Mixer(nn.Module):
|
||||
):
|
||||
# 1. Gated MLP's linear projection
|
||||
hidden_states = apply_mask_to_padding_states(hidden_states, attention_mask)
|
||||
# Add Multipliers
|
||||
hidden_states = hidden_states * self.ssm_in_multiplier
|
||||
projected_states = self.in_proj(hidden_states)
|
||||
projected_states = projected_states * self.mup_vector
|
||||
projected_states = projected_states * self.mup_vector # ADD Mup Multipliers
|
||||
d_to_remove = 2 * self.intermediate_size + 2 * self.n_groups * self.ssm_state_size + self.num_heads
|
||||
|
||||
# Set up dimensions for reshapes later
|
||||
@ -611,10 +612,13 @@ class FalconH1Mixer(nn.Module):
|
||||
|
||||
# 1. Gated MLP's linear projection
|
||||
input_states = apply_mask_to_padding_states(input_states, attention_mask)
|
||||
# Add Multipliers
|
||||
input_states = input_states * self.ssm_in_multiplier
|
||||
projected_states = self.in_proj(input_states)
|
||||
gate, hidden_states_B_C, dt = projected_states.split(
|
||||
[self.intermediate_size, self.conv_dim, self.num_heads], dim=-1
|
||||
)
|
||||
projected_states = projected_states * self.mup_vector # ADD Mup Multipliers
|
||||
gate, hidden_states_B_C, dt = projected_states.split([
|
||||
self.intermediate_size, self.conv_dim, self.num_heads
|
||||
], dim=-1)
|
||||
|
||||
use_precomputed_states = (
|
||||
cache_params is not None
|
||||
@ -725,8 +729,8 @@ class FalconH1Mixer(nn.Module):
|
||||
hidden_states = hidden_states.reshape(batch_size, seq_len, -1, self.head_dim).float()
|
||||
B = B.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
|
||||
C = C.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
|
||||
B = B.repeat(1, 1, self.num_heads // self.n_groups, 1)
|
||||
C = C.repeat(1, 1, self.num_heads // self.n_groups, 1)
|
||||
B = B.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
|
||||
C = C.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
|
||||
pad_size = (self.chunk_size - seq_len % self.chunk_size) % self.chunk_size
|
||||
|
||||
D_residual = self.D[..., None] * pad_tensor_by_size(hidden_states, pad_size)
|
||||
|
@ -484,24 +484,27 @@ class FalconH1ModelIntegrationTest(unittest.TestCase):
|
||||
"""
|
||||
An integration test for Falcon-H1.
|
||||
"""
|
||||
EXPECTED_TEXT = (
|
||||
"Tell me about the french revolution.\n"
|
||||
"The French Revolution (1789–1799) was a period of radical social and political upheaval in France that "
|
||||
"fundamentally transformed the nation and had profound effects on the rest of Europe and the world. Here are the key aspects of the revolution:\n\n"
|
||||
"### **Causes**\n"
|
||||
"1. **Economic Crisis**: France was in severe financial trouble due to costly wars (particularly the American Revolution), extravagant spending by the monarchy, and inefficient taxation.\n"
|
||||
"2. **Social Inequality**: The rigid class system (the Ancien Régime) divided society into the privileged nobility and clergy (First Estate) and the common people (Third Estate), who bore the brunt of taxation and had few rights.\n"
|
||||
"3. **Enlightenment Ideas**: Philosophers like Rousseau, Voltaire, and Montesquieu inspired ideas of liberty, equality, and popular sovereignty.\n"
|
||||
"4. **Settlement of 1789**: The Estates-General convened to address the financial crisis, leading to the Third Estate's assertion of its rights and the eventual formation of the National Assembly.\n\n"
|
||||
"### **Key Events**\n"
|
||||
"1. **Opening of the Revolution (1789)**:\n"
|
||||
"- **Storming of the Bastille**: Symbolic of the fall of royal tyranny.\n"
|
||||
"- **Declaration of the Rights of Man and of the Citizen**: Proclaimed universal rights to liberty, property, and security.\n"
|
||||
"- **Creation of the National Assembly**: The Third Estate declared itself the representative body of France.\n\n"
|
||||
"2. **Radical Phase (1792–1794)**:\n"
|
||||
"- **Reign of Terror**: Led by Maximilien Robespierre, the Committee of Public Safety enforced radical egalitarianism through the guillotine, executing thousands of perceived enemies of the revolution (monarchists, clergy, aristocrats, and counter-revolutionaries).\n"
|
||||
"- **Execution of Louis XVI**: The king was guillotined in June 1793, symbolizing the end of the monarchy.\n"
|
||||
)
|
||||
EXPECTED_TEXT = """
|
||||
user
|
||||
Tell me about the french revolution.
|
||||
assistant
|
||||
The French Revolution (1789–1799) was a period of radical social and political upheaval in France that fundamentally transformed the nation and had profound effects on the rest of Europe and the world. Here are the key aspects of the revolution:
|
||||
|
||||
### **Causes**
|
||||
1. **Economic Crisis**: France was in severe financial trouble due to costly wars (particularly the American Revolution), extravagant spending by the monarchy, and inefficient taxation.
|
||||
2. **Social Inequality**: The rigid class system (the Ancien Régime) divided society into the privileged nobility and clergy (First Estate) and the commoners (Third Estate), who bore the brunt of taxation and had few rights.
|
||||
3. **Enlightenment Ideas**: Philosophers like Voltaire, Rousseau, and Montesquieu inspired ideas of liberty, equality, and popular sovereignty.
|
||||
4. **Settlement of 1789**: The Estates-General convened to address the financial crisis, leading to the Third Estate's assertion of its rights and the eventual abolition of the feudal system.
|
||||
|
||||
### **Key Events**
|
||||
1. **Storming of the Bastille (July 14, 1789)**: A symbol of royal tyranny, the Bastille fortress was stormed by revolutionaries, sparking widespread rebellion.
|
||||
2. **Declaration of the Rights of Man and of the Citizen (August 1789)**: A foundational document proclaiming liberty, equality, and fraternity.
|
||||
3. **National Assembly and King’s Trial (1791–1792)**: King Louis XVI and his ministers were tried and executed (King Louis was guillotined, Marie Antoinette was banished), marking the end of the monarchy.
|
||||
4. **Rise of the Jacobins and Reign of Terror (1793–1794)**: Radical leaders like Maximilien Robespierre sought to purge France of counter-revolutionaries, leading to mass executions and widespread fear.
|
||||
5. **Thermidorian Reaction
|
||||
"""
|
||||
# Remove the first char (`\n`) and the consecutive whitespaces caused by the formatting.
|
||||
EXPECTED_TEXT = EXPECTED_TEXT.strip().replace(" " * 12, "")
|
||||
|
||||
model_id = "tiiuae/Falcon-H1-1.5B-Deep-Instruct"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
Loading…
Reference in New Issue
Block a user