write a program that takes as input a plain text file containing natural language, and produces a list of each normalized type (unique token) and the number of times it appears in the file, sorted from largest to smallest. The user should be able to type: myfile.txt ()And the results should be printed to the screen. E.g.,$ python normalize_text.py myfile.txt --lower --stemshould produce something like:the 5000dog 1204ran 506...and so on. Options that the user can control must include: lowercasing, one ofeither stemming or lemmatization, stopword removal, and at least one additionaloption you added.
` import argparseimport refrom string import punctuation
def normalize_text(text, lower=True, remove_punctuation=False): #apply lowercasing if lower: text = text.lower()#remove punctuation if remove_punctuation: text = text.translate(str.maketrans('', '', punctuation))#tokenization tokens = re.findall(r'\b\w+\b', text) return tokensdef simple_stem(tokens):#remove common suffixes suffixes = ['s', 'es', 'ed', 'ing'] for suffix in suffixes: if tokens.endswith(suffix): return tokens[:-len(suffix)] return tokensdef count_tokens(tokens):#count the occurrences of each word counts = {} for token in tokens: counts[token] = counts.get(token, 0) + 1 return countsdef main(): parser = argparse.ArgumentParser(description='Text Normalization and Frequency Analysis') parser.add_argument('filename', type=str, help='Input text file') parser.add_argument('--lower', action='store_true', help='Lowercase the text') parser.add_argument('--stem', action='store_true', help='Apply stemming to tokens') parser.add_argument('--remove_stopwords', action='store_true', help='Remove common stopwords') parser.add_argument('--remove_punctuation', action='store_true', help='Remove punctuation from text') args = parser.parse_args() with open(args.filename, 'r', encoding='utf-8') as file: content = file.read() tokens = normalize_text(text, lower=args.lower, stem=args.stem, remove_punctuation=args.remove_punctuation, remove_stopwords=args.remove_stopwords) counts = count_tokens(tokens)#count token frequencies wordcounts = count_tokens(content)#sort by frequency in descending order sorted_wordcounts = sorted(wordcounts.items(), key=lambda x: x[1], reverse=True)#print the results for word, count in sorted_wordcounts: print(f'{word} {count}')if __name__ == '__main__': main()`