Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 13951

How to solve: RuntimeError: CUDA error: device-side assert triggered?

$
0
0

I want to use the paraphrase-multilingual-mpnet-base-v2 model to build embeddings and I got this error:

RuntimeError: CUDA error: device-side assert triggered

The error occurs by executing string = {k: v.to(device=device) for k, v in string.items()}.

Why do I get the error?

I work in a Google Colab with 12.7 GB RAM and 16 GB GPU-RAM

The goal of the code is to generate sentence embeddings. With some customizing is a chunk-wise execution also possible.

The complete error message:

RuntimeError                              Traceback (most recent call last) <ipython-input-17-8e6bf00d9e24> in <cell line: 104>()    102     return np.nan    103 --> 104 processed_data = processDataRAG(df[5000:], tokenizer, model)4 frames <ipython-input-17-8e6bf00d9e24> in processDataRAG(data, tokenizer, model)     10   sents = [str(sentences[0]) for sentences in article_sentences]     11   number_of_article =[sentences[1] for sentences in article_sentences]---> 12   embedded_sentencs = [embeddChunkwise(sentence, tokenizer, model, 512) for sentence in tqdm(sents, desc = "Create chunk-wise embeddings")]     13   return pd.DataFrame({     14       "sentences": sents,<ipython-input-17-8e6bf00d9e24> in <listcomp>(.0)     10   sents = [str(sentences[0]) for sentences in article_sentences]     11   number_of_article =[sentences[1] for sentences in article_sentences]---> 12   embedded_sentencs = [embeddChunkwise(sentence, tokenizer, model, 512) for sentence in tqdm(sents, desc = "Create chunk-wise embeddings")]     13   return pd.DataFrame({     14       "sentences": sents,<ipython-input-17-8e6bf00d9e24> in embeddChunkwise(string, tokenizer, model, chunk_size)     55     #encoded_input = tokenizer(tokenizer.detokenize(tokenized_chunk))     56     if len(encoded_chunk) > 0:---> 57       embedded_chunk = createEmbeddings(     58           tokenizer(tokenizer.decode(encoded_chunk, skip_special_tokens  = True), return_tensors='pt', add_special_tokens=False),     59           model<ipython-input-17-8e6bf00d9e24> in createEmbeddings(string, model)     77   #print("Length of input_ids: ", len(string["input_ids"][0]))     78   if "input_ids" in string.keys():---> 79     string = {k: v.to(device=device) for k, v in string.items()}     80     with torch.no_grad():     81 <ipython-input-17-8e6bf00d9e24> in <dictcomp>(.0)     77   #print("Length of input_ids: ", len(string["input_ids"][0]))     78   if "input_ids" in string.keys():---> 79     string = {k: v.to(device=device) for k, v in string.items()}     80     with torch.no_grad():     81 RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

I run this code:

from transformers import AutoTokenizer, AutoModelimport torchfrom torch import cudadef mean_pooling(model_output, attention_mask):    token_embeddings = model_output[0] #First element of model_output contains all token embeddings    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)# Select device globallydevice = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'# Load model from HuggingFace Hubtokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')model = AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2',                                  device_map = device)df = pd.read_json(file_path)def processDataRAG(data, tokenizer, model):  article_sentences = data.content.progress_apply(lambda x: list(nlp_de(x).sents))  #tokenized_articles = data.content.progress_apply(lambda article: tokenizeChunkwise(article, tokenizer, 512))  article_sentences = [      (sentences, idx) for idx, article in tqdm(enumerate(list(article_sentences)), desc="Loop over articles with index")       for sentences in article      ]  sents = [str(sentences[0]) for sentences in article_sentences]  number_of_article =[sentences[1] for sentences in article_sentences]  embedded_sentencs = [embeddChunkwise(sentence, tokenizer, model, 512) for sentence in tqdm(sents, desc = "Create chunk-wise embeddings")]  return pd.DataFrame({"sentences": sents,"embeddings": embedded_sentencs,"article": number_of_article  })def embeddChunkwise(string, tokenizer, model, chunk_size):  decreasing_by_special_tokens = 0 # Because of speical tokens at the beginning and end  encoded_string = tokenizer(string, add_special_tokens=False)  if len(encoded_string["input_ids"])/chunk_size > 1:    print("Tokenized_string:", encoded_string)    print("Total tokens: ", str(len(encoded_string["input_ids"])))    print("Tokenized string in chunks: ", str(len(encoded_string["input_ids"])/chunk_size), " --- " , str(len(encoded_string["input_ids"])//chunk_size +1))  embedded_chunks = []  for idx in list(range(len(encoded_string["input_ids"])//chunk_size +1 )):    encoded_chunk=None    if (chunk_size-decreasing_by_special_tokens)*(idx+1) < len(encoded_string["input_ids"]): # sentences with 1000 words as instances      start_idx, end_idx = (chunk_size*idx - decreasing_by_special_tokens*idx, chunk_size*(idx+1) - decreasing_by_special_tokens*(idx+1))      encoded_chunk = encoded_string["input_ids"][start_idx:end_idx]    else: # If it is a sentences with 20 words as instance      if chunk_size-decreasing_by_special_tokens > len(encoded_string["input_ids"]):        encoded_chunk = encoded_string["input_ids"][chunk_size*(idx) - decreasing_by_special_tokens*(idx):]      else:        encoded_chunk = encoded_string["input_ids"][-(chunk_size*(idx) - decreasing_by_special_tokens*(idx)):]    if len(encoded_chunk) > 0:      embedded_chunk = createEmbeddings(          tokenizer(tokenizer.decode(encoded_chunk, skip_special_tokens  = True), return_tensors='pt', add_special_tokens=False),           model          )      if isinstance(embedded_chunk, list):        embedded_chunks.append(embedded_chunk[0])  if len(embedded_chunks) > 1:    return embedded_chunks  elif len(embedded_chunks) == 0:    return np.nan  else:    return embedded_chunks[0]def createEmbeddings(string, model):  if "input_ids" in string.keys():    string = {k: v.to(device=device) for k, v in string.items()}    with torch.no_grad():        try:          model_output = model(**string)        except Exception as ex:          print("--- Error by creating Embeddings ---")          print("Error: ", str(ex))          return np.nan    # Perform pooling. In this case, average pooling    try:      sentence_embeddings = mean_pooling(model_output, string['attention_mask'])    except Exception as ex:      print("--- Error by pooling embeddings ---")      print("Model output: ", str(model_output))      print("Attention_mask: ", str(string['attention_mask']))      print("Error: ", str(ex))      return np.nan    sentence_embeddings = sentence_embeddings.detach().cpu().numpy()    return sentence_embeddings  else:    return np.nan

Viewing all articles
Browse latest Browse all 13951

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>