Command-R

Runtime error

App Files Files Community

Command-R / app.py

minhdang

Update app.py

7364237 verified almost 2 years ago

raw

history blame contribute delete

3.6 kB

	import torch
	torch.jit.script = lambda f: f
	import spaces
	import gradio as gr
	import transformers
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,AwqConfig
	import torch
	import os
	import bitnet
	key = os.environ.get("key")
	from huggingface_hub import login
	login(key)
	from bitnet import replace_linears_in_hf
	# os.system("mkdir c4ai-command-r-v01-exl2")
	# os.system("huggingface-cli download bartowski/c4ai-command-r-v01-exl2 --revision 6_5 --local-dir c4ai-command-r-v01-exl2 --local-dir-use-symlinks False")
	# os.system("pip install flash-attn --no-build-isolation")
	nf4_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=True,
	bnb_4bit_compute_dtype=torch.bfloat16
	)
	model_id = "IEITYuan/Yuan2-M32-hf"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id,
	# load_in_8bit=True,
	quantization_config=nf4_config,
	# attn_implementation="flash_attention_2",
	# torch_dtype = torch.bfloat16,
	device_map="auto",
	trust_remote_code=True
	)

	# replace_linears_in_hf(model)
	model.eval()
	@spaces.GPU
	def generate_response(user_input, max_new_tokens, temperature):
	os.system("nvidia-smi")
	messages = [{"role": "user", "content": user_input}]
	input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
	input_ids = input_ids.to(model.device)
	os.system("nvidia-smi")
	gen_tokens = model.generate(
	input_ids = input_ids,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	)

	gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
	if gen_text.startswith(user_input):
	gen_text = gen_text[len(user_input):].lstrip()

	return gen_text



	examples = [
	{"message": "What is the weather like today?", "max_new_tokens": 250, "temperature": 0.5},
	{"message": "Tell me a joke.", "max_new_tokens": 650, "temperature": 0.7},
	{"message": "Explain the concept of machine learning.", "max_new_tokens": 980, "temperature": 0.4}
	]
	example_choices = [f"Example {i+1}" for i in range(len(examples))]

	def load_example(choice):
	index = example_choices.index(choice)
	example = examples[index]
	return example["message"], example["max_new_tokens"], example["temperature"]


	with gr.Blocks() as demo:
	with gr.Row():
	max_new_tokens_slider = gr.Slider(minimum=100, maximum=4000, value=980, label="Max New Tokens")
	temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.3, label="Temperature")
	message_box = gr.Textbox(lines=2, label="Your Message")
	generate_button = gr.Button("Try🫡Command-R")
	output_box = gr.Textbox(label="🫡Command-R")

	generate_button.click(
	fn=generate_response,
	inputs=[message_box, max_new_tokens_slider, temperature_slider],
	outputs=output_box
	)
	example_dropdown = gr.Dropdown(label="🫡Load Example", choices=example_choices)
	example_button = gr.Button("🫡Load")
	example_button.click(
	fn=load_example,
	inputs=example_dropdown,
	outputs=[message_box, max_new_tokens_slider, temperature_slider]
	)

	demo.launch()