I am trying to follow the code to extract keyphrases from a pandas column. Link to the jupyter notebook
Whenever I try to run I always get an error in the line with the regex operation.
corpus = []dataset['word_count'] = dataset[datacol].apply(lambda x: len(str(x).split(" ")))ds_count = len(dataset.word_count)for i in range(0, ds_count): # Remove punctuation text = re.sub('[^a-zA-Z]', '', str(dataset[datacol][i])) # the error is hereThis is the error I am getting. Does anyone know what could have possibly caused this?
---> 58 text = re.sub('[^a-zA-Z]', '', pr_df['Comment'][i])File C:\Program Files\Anaconda3\Lib\site-packages\pandas\core\series.py:981, in Series.__getitem__(self, key) 978 return self._values[key] 980 elif key_is_scalar:--> 981 return self._get_value(key) 983 if is_hashable(key): 984 # Otherwise index.get_value will raise InvalidIndexError 985 try: 986 # For labels that don't resolve as scalars like tuples and frozensetsFile C:\Program Files\Anaconda3\Lib\site-packages\pandas\core\series.py:1089, in Series._get_value(self, label, takeable) 1086 return self._values[label] 1088 # Similar to Index.get_value, but we do not fall back to positional-> 1089 loc = self.index.get_loc(label) 1090 return self.index._get_values_for_loc(self, loc, label)File C:\Program Files\Anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3804, in Index.get_loc(self, key, method, tolerance) 3802 return self._engine.get_loc(casted_key) 3803 except KeyError as err:-> 3804 raise KeyError(key) from err 3805 except TypeError: 3806 # If we have a listlike key, _check_indexing_error will raise 3807 # InvalidIndexError. Otherwise we fall through and re-raise 3808 # the TypeError. 3809 self._check_indexing_error(key)KeyError: 0