import csv import os import re from datetime import datetime, timedelta #***************************************************************************************************** # # Splitting recorded txt files based on event marker ID comments. # # Note: This file can be modified to split txt files on additional features such as the trial start # times or end times of each insect. # #***************************************************************************************************** def define_dicts(datapath): #***************************************************************************************************** # # First, define dictionaries that will help map an insects' ID, set number, and channel letter # to its unique point of entry in a file. # # Input: File path of data manually recorded during flight trials with the minimum following columns: # # ID: the insect identification number (must match with the ID in the event marker) # died?: marked with a Y if an insect died before testing # chamber: written as "A-n" or "B-n" where n is the channel number from 1-4 # set_number: the recording set number formatted leading zeros (e.g. 001, 002) # # Process: Assumes that no insect was tested twice in the same recording set. Uses DictReader # to remap dictionary rows into two data-based dictionaries. # # Output: Two dictionaries. The first_flight_dict is used for the first 4 insects # that enter the chamber (e.g. either chamber "A" or "B"), which have no # event marker to designate them. Its keys are set_number, channel_letter, # and channel_num and ID is its value. Per recording set, only the first 4 # insects for all eight channels are mapped in the dictionary. # # The current_flight_dict holds the remaining insects that get swapped in and # out of the chambers. Its keys are set_number, channel_letter, and ID, and the # channel_num is its value. By later extracting the ID number from the event # marker comment, the specific channel number can be accessed at the insect's # unique point of entry, which is its unique row in the file. # #***************************************************************************************************** first_flight_dict = {} current_flight_dict = {} with open(datapath, "r") as data_file: reader = csv.DictReader(data_file) for row in reader: if row['died?'] == 'Y': continue ID = int(row['ID']) set_num = row['set_number'] channel_letter = row['chamber'].split("-")[0] channel_num = row['chamber'].split("-")[-1] chamber = channel_letter + channel_num if (set_num, channel_letter, channel_num) not in first_flight_dict: first_flight_dict[(set_num, channel_letter, channel_num)] = ID if (set_num, channel_letter, ID) not in current_flight_dict: current_flight_dict[(set_num, channel_letter, ID)] = channel_num else: print('PROBLEM, BUG %s SHOWS UP AGAIN'%ID) return (first_flight_dict, current_flight_dict) def map_IDs(path, outpath): #***************************************************************************************************** # # Map event marker IDs for an insect's channel number and its unique point of entry, or unique row in # a file. # # Input: Txt file inpath and outpath. Input txt file has columns of the time from the beginning of # the file (TBF), the voltage readings, date, time, and event marker information. Txt file # also has at least the following in its filename: set_number and channel_letter. In this # script, the code extracts information from this filename format: "T1_set006-2-24-2020-B.txt" # where each sequence is the following, # # T1: trial type number # set006: recording set number formatted leading zeros to keep the files read in order # 2-24-2020: the date the recording occured # B: channel letter # # *Lines 133-135 will need to be recoded for different filenames # # Process: New dictionary rows are encoded that retain TBF, the voltages, the datetime, and the event # number, but now include 4 new keys: channel1_bug, channel2_bug, channel3_bug, channel4_bug. # The values in each new key are the IDs of the bug. To do this, an inplace dictionary called # current_bugs is made. The ID in the event marker comment is extracted and used as one # of the keys, along with keys set_number and channel_letter extracted from the filename, # to access that bug's channel number. The 'new' bug then replaces the 'old' bug in the # current bugs dict at its designated channel by reassigning ID values. # # Output: A newly formatted file with 4 new columns of IDs that map out, based on the event marker, # when bugs come in and out and to which channel. # #***************************************************************************************************** dir_list = sorted(os.listdir(path)) header = ["TBF","1","2","3","4", "event_happened","event_num","buffer","date","time","event_marker"] for file in dir_list: if file.startswith("."): continue filepath = path + str(file) full_data = [] before_first_event = True #*********************************************************************************************************** # Set the extraction of the set_number and channel_letter according to the filename on lines 133-135: # # Use the split() function to split a string, in this case the filename, on any symbol. # For example, if the filename is instead as simple as '6-B.txt' where 6 is the # set number and B is the channel letter then the user can consider and write the following: # # file.split("-") splits the filename into a list of ['6', 'B.txt'] # file.split("-")[0] extracts only '6', the value at the 0th index of list ['6', 'B.txt'] # file.split("-")[1] extracts 'B.txt', the value at the 1st index of list ['6', 'B.txt'] # file.split("-")[1].split(".") splits the string into a list of ['B', 'txt'] # file.split("-")[1].split(".")[0] extracts only 'B', the value at the 0th index of list ['B', 'txt'] # # This is not the only way to extract the set number or channel letter, but it is a example template. # #*********************************************************************************************************** set_num = file.split('-')[0][6:] set_number = set_num.lstrip("0") channel_letter = file.split('.')[0][-1] print(file + "--------------------------------") with open(filepath, 'r', encoding='latin') as input_file: reader = csv.DictReader(input_file, delimiter = ',', fieldnames=header) for row in reader: if row['TBF'].startswith("Samples per sec."): continue if (int(float(row['event_num'])) == 1) and (int(float(row['event_happened'])) == 3): # false event marker row['event_num'] = '0' row['event_happened'] = '0' new_row = {} new_row['TBF'] = row['TBF'] new_row['channel1_voltage'] = row['1'] new_row['channel2_voltage'] = row['2'] new_row['channel3_voltage'] = row['3'] new_row['channel4_voltage'] = row['4'] new_row['event_num'] = row['event_num'] datetime_str = row['date'] + ' ' + row['time'] datetime_object = datetime.strptime(datetime_str,'%m-%d-%y %H:%M:%S') new_row['datetime'] = datetime_object if before_first_event: current_bugs = {'channel1': first_flight_dict[(set_num,channel_letter,'1')], 'channel2': first_flight_dict[(set_num,channel_letter,'2')], 'channel3': first_flight_dict[(set_num,channel_letter,'3')], 'channel4': first_flight_dict[(set_num,channel_letter,'4')]} before_first_event = False elif (not before_first_event) and (int(float(row['event_num'])) != 0): if row["event_marker"] == '' or row["event_marker"] == None: continue new_bug = int(re.search(r'\d+', row['event_marker']).group()) new_channel = current_flight_dict[(set_num, channel_letter, new_bug)] event_number = int(row['event_num']) - 1 # removes false first event marker count new_row['event_num'] = event_number print(' Event Marker %s:'%(event_number), ' new bug %s replacing old bug %s at channel %s'%(new_bug, current_bugs['channel%s'%new_channel], new_channel)) current_bugs['channel%s'%new_channel] = new_bug new_row['channel1_bug'] = current_bugs['channel1'] new_row['channel2_bug'] = current_bugs['channel2'] new_row['channel3_bug'] = current_bugs['channel3'] new_row['channel4_bug'] = current_bugs['channel4'] full_data.append(new_row) with open(outpath + file,"w") as output_file: writer = csv.DictWriter(output_file, delimiter=',', fieldnames=new_row.keys()) for r in full_data: writer.writerow(r) def split_files(path, outpath): #***************************************************************************************************** # # Split files according to ID. # # Input: The newly formatted txt file with 4 new columns of intermittent ID numbers. # # Process: New dictionary rows are encoded that retain the TBF, voltages specific to the channel # number the insect flew in, and the datetime. These dictionary rows become the data # values of the ID_data dict where the keys are the IDs. Looping through the items in # this embedded dictionary, each key value or insect ID gets written into its own txt # file with the rows specific to its flight trial. # # * Here as well, lines 232 and 233 will need to be recoded for different filenames. # # Output: New txt files. the filename contains additional information now: the insect ID number # located at the end of the filename as well as its chamber name (e.g. "B1", "A3", etc.). # The file istelf contains the TBF and voltage readings rows specific to when the bug # entered and left the flight mill chamber. # #***************************************************************************************************** dir_files = sorted(os.listdir(path)) col_names = ["TBF","1","2","3","4", "event_num", "datetime", "chn1_ID","chn2_ID","chn3_ID","chn4_ID"] for f in dir_files: if f.startswith("."): continue filepath = path + str(f) # Set the extraction of the set_num and channel_letter according to filename in lines 232-233: set_num = f.split('-')[0][6:] channel_letter = f.split('.')[0][-1] print("File splitting: " + f + "--------------------------------") for channel in range(1,5): ID_data = {} with open(filepath, "r") as input_file: reader = csv.DictReader(input_file,fieldnames=col_names) for row in reader: ID = row['chn' + str(channel) + '_ID'] new_row = {} new_row['TBF'] = row['TBF'] new_row['voltage'] = row[str(channel)] new_row['datetime'] = row['datetime'] if ID not in ID_data: ID_data[ID] = [] ID_data[ID].append(new_row) for key_ID, data in ID_data.items(): print(" Making file for ID, " + str(key_ID)) with open(outpath + f.split(".")[0] + str(channel) + \ '_' + str(key_ID) + ".txt","w") as output_file: writer = csv.DictWriter(output_file, fieldnames=new_row.keys()) for r in data: writer.writerow(r) #***************************************************************************************************** # Write file main path down below. An example path is r"/Users/username/Desktop/Flight_scripts/". #***************************************************************************************************** main_path = # input the path to the Flight_scripts directory here # Defining dictionaries: datafile = main_path + "data/datasheet.csv" first_flight_dict, current_flight_dict = define_dicts(datafile) # Mapping event marker IDs: txt_inpath = main_path + "recordings/" txt_outpath = main_path + "files2split/" map_IDs(txt_inpath, txt_outpath) # Splitting files by ID: split_inpath = main_path + "files2split/" split_outpath = main_path + "split_files/" split_files(split_inpath, split_outpath)