Currently, my code runs fine it's just running a bit slow for my use. I am using OpenCV to capture the screen and then checking those results using some if statements. I believe the current bottleneck is in the if statements.
pyautogui.PAUSE = 0.005Location1 = range(90, 289)Location2 = range(290, 400) # 394, 403fLocation3 = range(450, 630)# 551, 565Location4 = range(640, 800) # 730, 695Location5 = range(801, 1000)empty = []# Change the working directory to the folder this script is in.# Doing this because I'll be putting the files from each video in their own folder on GitHubos.chdir(os.path.dirname(os.path.abspath(__file__)))# initialize the WindowCapture classwincap = WindowCapture(None)# initialize the Vision classvision_limestone = Vision('Festivalnote.jpg')keys = []inputs = []loop_time = time()while 1: # get an updated image of the game screenshot = wincap.get_screenshot() #haystack_img = vision_limestone.apply_hsv_filter(screenshot) # display the processed image points = vision_limestone.find(screenshot, 0.5)#, 'rectangles') #cv.imshow('Matches', haystack_img) #length = len(keys) if points == empty: points = [(0, 0, 0)] if len(points) < 3: points.append((0, 0)) points.append ((0, 0)) first, sec, thd, *others = points #print(points) y_thresh = 175 if first [0] in Location1 or sec[0] in Location1 or thd[0] in Location1: #print('d') pyautogui.keyDown("d") pyautogui.keyUp("d") #keys.append('d') #inputs.append('d') if first [0] in Location2 or sec[0] in Location2 or thd[0] in Location2: pyautogui.keyDown("f") pyautogui.keyUp("f") #print('f') #keys.append('f') #inputs.append("f") if first [0] in Location3 or sec[0] in Location3 or thd[0] in Location3: pyautogui.keyDown("j") pyautogui.keyUp("j") #print('j') #keys.append('j') #inputs.append('j') if first [0] in Location4 or sec[0] in Location4 or thd[0] in Location4: pyautogui.keyDown("k") pyautogui.keyUp("k") #print("k") #keys.append('k') #inputs.append('k') if first [0] in Location5 or sec[0] in Location5 or thd[0] in Location5: pyautogui.keyDown("l") pyautogui.keyUp("l") #print("l") #keys.append('l') #inputs.append('l') #if length == len(keys): #inputs.append(time() - loop_time) #print(inputs) # debug the loop rate print('FPS {}'.format(1 / (time() - loop_time))) loop_time = time() # press 'q' with the output window focused to exit. # waits 1 ms every loop to process key presses if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() break
I really have no idea if this even can be sped up as I am a newer coder however I looked at numpy and doing loops that way but I didn't see a way to do this using numpy (correct me if I'm wrong). I also looked at multiprocessing, however it's my understanding that I couldn't call back to the screenshot from within the threads and would thus have to take a screenshot for every thread which seems like it would be slower. please let me know of any ways you think I could speed this up. Full code.
def func(): import cv2 import numpy as np import os from time import time import pyautogui import win32gui, win32ui, win32con class WindowCapture: # properties w = 0 h = 0 hwnd = None cropped_x = 800 cropped_y = 950 offset_x = 0 offset_y = 0 # constructor def __init__(self, window_name=None): # find the handle for the window we want to capture. # if no window name is given, capture the entire screen if window_name is None: self.hwnd = win32gui.GetDesktopWindow() else: self.hwnd = win32gui.FindWindow(None, window_name) if not self.hwnd: raise Exception('Window not found: {}'.format(window_name)) # get the window size window_rect = win32gui.GetWindowRect(self.hwnd) self.w = 1000 #window_rect[2] - window_rect[0] self.h = 300 #window_rect[3] - window_rect[1] # account for the window border and titlebar and cut them off #border_pixels = 8 #titlebar_pixels = 30 #self.w = self.w - (border_pixels * 2) #self.h = self.h - titlebar_pixels - border_pixels #self.cropped_x = border_pixels #self.cropped_y = titlebar_pixels # set the cropped coordinates offset so we can translate screenshot # images into actual screen positions self.offset_x = window_rect[0] + self.cropped_x self.offset_y = window_rect[1] + self.cropped_y def get_screenshot(self): # get the window image data wDC = win32gui.GetWindowDC(self.hwnd) dcObj = win32ui.CreateDCFromHandle(wDC) cDC = dcObj.CreateCompatibleDC() dataBitMap = win32ui.CreateBitmap() dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h) cDC.SelectObject(dataBitMap) cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY) # convert the raw data into a format opencv can read #dataBitMap.SaveBitmapFile(cDC, 'debug.bmp') signedIntsArray = dataBitMap.GetBitmapBits(True) img = np.fromstring(signedIntsArray, dtype='uint8') img.shape = (self.h, self.w, 4) # free resources dcObj.DeleteDC() cDC.DeleteDC() win32gui.ReleaseDC(self.hwnd, wDC) win32gui.DeleteObject(dataBitMap.GetHandle()) # drop the alpha channel, or cv.matchTemplate() will throw an error like: # error: (-215:Assertion failed) (depth == CV_8U || depth == CV_32F) && type == _templ.type() # && _img.dims() <= 2 in function 'cv::matchTemplate' img = img[...,:3] # make image C_CONTIGUOUS to avoid errors that look like: # File ... in draw_rectangles # TypeError: an integer is required (got type tuple) # see the discussion here: # https://github.com/opencv/opencv/issues/14866#issuecomment-580207109 img = np.ascontiguousarray(img) return img # find the name of the window you're interested in. # once you have it, update window_capture() # https://stackoverflow.com/questions/55547940/how-to-get-a-list-of-the-name-of-every-open-window @staticmethod def list_window_names(): def winEnumHandler(hwnd, ctx): if win32gui.IsWindowVisible(hwnd): print(hex(hwnd), win32gui.GetWindowText(hwnd)) win32gui.EnumWindows(winEnumHandler, None) # translate a pixel position on a screenshot image to a pixel position on the screen. # pos = (x, y) # WARNING: if you move the window being captured after execution is started, this will # return incorrect coordinates, because the window position is only calculated in #the __init__ constructor. def get_screen_position(self, pos): return (pos[0] + self.offset_x, pos[1] + self.offset_y) class Vision: # properties needle_img = None needle_w = 0 needle_h = 0 method = None # constructor def __init__(self, needle_img_path, method=cv2.TM_CCOEFF_NORMED): # load the image we're trying to match # https://docs.opencv.org/4.2.0/d4/da8/group__imgcodecs.html self.needle_img = cv2.imread(needle_img_path, cv2.IMREAD_UNCHANGED) # Save the dimensions of the needle image self.needle_w = self.needle_img.shape[1] self.needle_h = self.needle_img.shape[0] # There are 6 methods to choose from: # TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED self.method = method def find(self, haystack_img, threshold=0.5, debug_mode=None): # run the OpenCV algorithm result = cv2.matchTemplate(haystack_img, self.needle_img, self.method) # Get the all the positions from the match result that exceed our threshold locations = np.where(result >= threshold) locations = list(zip(*locations[::-1])) #print(locations) # You'll notice a lot of overlapping rectangles get drawn. We can eliminate those redundant # locations by using groupRectangles(). # First we need to create the list of [x, y, w, h] rectangles rectangles = [] for loc in locations: rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h] # Add every box to the list twice in order to retain single (non-overlapping) boxes rectangles.append(rect) rectangles.append(rect) # Apply group rectangles. # The groupThreshold parameter should usually be 1. If you put it at 0 then no grouping is # done. If you put it at 2 then an object needs at least 3 overlapping rectangles to appear # in the result. I've set eps to 0.5, which is: # "Relative difference between sides of the rectangles to merge them into a group." rectangles, weights = cv2.groupRectangles(rectangles, groupThreshold=1, eps=0.5) #print(rectangles) points = [] if len(rectangles): #print('Found needle.') #line_color = (0, 255, 0) #line_type = cv2.LINE_4 #marker_color = (0, 255, 0) #marker_type = cv2.MARKER_CROSS # Loop over all the rectangles for (x, y, w, h) in rectangles: # Determine the center position center_x = x + int(w/2) center_y = y + int(h/2) # Save the points points.append((center_x, center_y)) #if debug_mode == 'rectangles': # Determine the box position #top_left = (x, y) #bottom_right = (x + w, y + h) # Draw the box #cv2.rectangle(haystack_img, top_left, bottom_right, color=line_color, #lineType=line_type, thickness=2) #elif debug_mode == 'points': # Draw the center point #cv.drawMarker(haystack_img, (center_x, center_y), #color=marker_color, markerType=marker_type, #markerSize=40, thickness=2) if debug_mode: cv2.imshow('Matches', haystack_img) #cv.waitKey() #cv.imwrite('result_click_point.jpg', haystack_img) return points pyautogui.PAUSE = 0.005 Location1 = range(90, 289) Location2 = range(290, 400) # 394, 403f Location3 = range(450, 630)# 551, 565 Location4 = range(640, 800) # 730, 695 Location5 = range(801, 1000) empty = [] # Change the working directory to the folder this script is in. # Doing this because I'll be putting the files from each video in their own folder on GitHub os.chdir(os.path.dirname(os.path.abspath(__file__))) # initialize the WindowCapture class wincap = WindowCapture(None) # initialize the Vision class vision_limestone = Vision('Festivalnote.jpg') keys = [] inputs = [] loop_time = time() while 1: # get an updated image of the game screenshot = wincap.get_screenshot() #haystack_img = vision_limestone.apply_hsv_filter(screenshot) # display the processed image points = vision_limestone.find(screenshot, 0.5)#, 'rectangles') #cv.imshow('Matches', haystack_img) #length = len(keys) if points == empty: points = [(0, 0, 0)] if len(points) < 3: points.append((0, 0)) points.append ((0, 0)) first, sec, thd, *others = points #print(points) y_thresh = 175 if first [0] in Location1 or sec[0] in Location1 or thd[0] in Location1: #print('d') pyautogui.keyDown("d") pyautogui.keyUp("d") #keys.append('d') #inputs.append('d') if first [0] in Location2 or sec[0] in Location2 or thd[0] in Location2: pyautogui.keyDown("f") pyautogui.keyUp("f") #print('f') #keys.append('f') #inputs.append("f") if first [0] in Location3 or sec[0] in Location3 or thd[0] in Location3: pyautogui.keyDown("j") pyautogui.keyUp("j") #print('j') #keys.append('j') #inputs.append('j') if first [0] in Location4 or sec[0] in Location4 or thd[0] in Location4: pyautogui.keyDown("k") pyautogui.keyUp("k") #print("k") #keys.append('k') #inputs.append('k') if first [0] in Location5 or sec[0] in Location5 or thd[0] in Location5: pyautogui.keyDown("l") pyautogui.keyUp("l") #print("l") #keys.append('l') #inputs.append('l') #if length == len(keys): #inputs.append(time() - loop_time) #print(inputs) # debug the loop rate print('FPS {}'.format(1 / (time() - loop_time))) loop_time = time() # press 'q' with the output window focused to exit. # waits 1 ms every loop to process key presses if cv2.waitKey(1) == ord('q'): cv2.destroyAllWindows() break #with open("inputs.txt", "w") as output: #output.write(",".join(str(i) for i in inputs)) print('Done.')func()