Commit 5b421499d1f3efdff616175d22f177ac1de63ed5

Authored by Brice COLOMBIER
0 parents
Exists in master

Initial commit

Showing 2 changed files with 143 additions and 0 deletions

... ... @@ -0,0 +1,5 @@
  1 +*.npy
  2 +*.bin
  3 +*.txt
  4 +*.py~
  5 +.gitignore~
0 6 \ No newline at end of file
preprocessing.py View file @ 5b42149
... ... @@ -0,0 +1,138 @@
  1 +# Imports for processing
  2 +import numpy as np
  3 +import scipy.special
  4 +from skimage.util.shape import view_as_windows
  5 +import itertools
  6 +
  7 +# Imports for parallel processing
  8 +from multiprocessing import Pool
  9 +from multiprocessing import current_process
  10 +import math
  11 +import copy
  12 +
  13 +# Imports for script utilitaries
  14 +import logging as log
  15 +import argparse
  16 +
  17 +# Imports for dev
  18 +import cProfile
  19 +import time
  20 +
  21 +def multi_run_wrapper(args):
  22 + return pairwise_operation(*args)
  23 +
  24 +def pairwise_operation(traces,
  25 + window_size,
  26 + minimum_distance,
  27 + operation,
  28 + dtype,
  29 + verbose=False,
  30 + first_chunk=True):
  31 +
  32 + """Operates in a sliding window_size over the whole trace
  33 + Computes x_i * x_j for all possible pair of samples (x_i, x_j)
  34 + in the window_size with distance(x_i, x_j) > minimmum_distance.
  35 +
  36 + Keyword arguments:
  37 + traces_name: name of the file storing the traces
  38 + window_size: size of the window in which pairwise operation is done
  39 + minimum_distance: minimum distance between two samples processed
  40 + operation: processing operation to apply on the pair of samples
  41 + verbose: display INFO
  42 + first_chunk: indicates first chunk of data for parallel processing
  43 + """
  44 +
  45 + if verbose:
  46 + log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
  47 + log.info("Verbose output enabled")
  48 + else:
  49 + log.basicConfig(format="%(levelname)s: %(message)s")
  50 +
  51 + nb_traces, nb_samples = np.shape(traces)
  52 + log.info("Input: {0} traces of {1} samples".format(nb_traces, nb_samples))
  53 +
  54 + # Report file size increase
  55 + if first_chunk:
  56 + processed_trace_length = len(list(itertools.combinations(range(window_size), 2))) + (nb_samples-window_size)*(window_size-1)
  57 + print processed_trace_length
  58 + else:
  59 + processed_trace_length = len(list(itertools.combinations(range(window_size), 2))) + (nb_samples-window_size)*(window_size-1)
  60 + # Allocate memory for the processed trace
  61 + processed_trace = np.zeros((processed_trace_length, nb_traces), dtype=dtype)
  62 + current_index = 0
  63 + # For all possible start indices in the window
  64 + for current_distance in range(minimum_distance, window_size):
  65 + for current_start_index in range(nb_samples - current_distance):
  66 + if first_chunk or (not first_chunk and current_start_index+current_distance>=window_size-1):
  67 + value = np.array(operation(traces[:,current_start_index], traces[:,current_start_index+current_distance]), ndmin=2)
  68 + # Store the resulting vector
  69 + processed_trace[current_index,:] = np.transpose(value)[:,0]
  70 + # Increase the running index
  71 + current_index+=1
  72 + # print "Number of columns to remove", len(processed_trace[np.all(processed_trace == 0, axis=1)])
  73 + processed_trace = processed_trace[~np.all(processed_trace == 0, axis=1)]
  74 + return np.transpose(processed_trace)
  75 +
  76 +def multiplication(*args):
  77 + return args[0] * args[1]
  78 +def addition(*args):
  79 + return args[0] + args[1]
  80 +
  81 +if __name__ == "__main__":
  82 +
  83 + parser = argparse.ArgumentParser(description='Preprocess traces')
  84 + parser.add_argument("traces_name", type=str)
  85 + parser.add_argument("--op", type=str, choices=['addition', 'multiplication', '', '', ''])
  86 + parser.add_argument("--window_size", type=int)
  87 + parser.add_argument("--min_dist", type=int)
  88 + parser.add_argument("--dtype", type=str)
  89 + parser.add_argument("--ncores", type=int)
  90 + parser.add_argument('-v', '--verbose', action='store_true')
  91 + args = parser.parse_args()
  92 + if args.op == 'multiplication':
  93 + operation = multiplication
  94 + elif args.op == 'addition':
  95 + operation = addition
  96 + dtype = np.dtype(args.dtype).type
  97 +
  98 + data_set_width = 1000
  99 + data_set_height = 1
  100 + test_array = np.array([range(i, i+data_set_width) for i in range(data_set_height)])
  101 + # print test_array
  102 + traces = test_array
  103 + # traces = np.load(args.traces_name)
  104 + shortened = 0
  105 + while int(np.shape(traces)[1] + (args.ncores - 1)*(args.window_size - 1))%args.ncores != 0:
  106 + traces = traces[:,:-1].copy()
  107 + shortened+=1
  108 + if shortened:
  109 + log.warning("Traces shortened by {0} samples to split them into equally-sized chunks".format(shortened))
  110 + # print traces
  111 + t0 = time.time()
  112 + processed_traces = pairwise_operation(traces, args.window_size, args.min_dist, operation, dtype, args.verbose)
  113 + for i in processed_traces:
  114 + i.sort()
  115 + print "###\nNORMAL"
  116 + print np.shape(processed_traces)
  117 + print round(time.time()-t0, 2), "s"
  118 + t0 = time.time()
  119 + print "###"
  120 +
  121 + pool = Pool(args.ncores)
  122 + chunk_size = int(np.shape(traces)[1]+(args.ncores-1)*(args.window_size-1))/args.ncores
  123 + print "Traces split into chunks of {0} samples".format(chunk_size)
  124 + traces = view_as_windows(traces, (np.shape(traces)[0],chunk_size), step=chunk_size-args.window_size+1)[0]
  125 + # print traces
  126 + arguments = [(trace_set, args.window_size, args.min_dist, operation, dtype, args.verbose, first_chunk) for (trace_set, first_chunk) in zip(traces, [True]+(args.ncores-1)*[False])]
  127 + processed_traces_parallel = np.concatenate(pool.map(multi_run_wrapper, arguments), axis=1)
  128 + for i in processed_traces_parallel:
  129 + i.sort()
  130 + print "###\nPARALLEL"
  131 + print np.shape(processed_traces_parallel)
  132 + print round(time.time()-t0, 2), "s"
  133 + t0 = time.time()
  134 + print "###"
  135 + if np.all(processed_traces==processed_traces_parallel):
  136 + print "SURPRISINGLY, IT ACTUALLY WORKED !!!"
  137 + # pairwise_operation(traces, args.window_size, args.min_dist, operation, dtype, args.verbose)
  138 + # np.save("preprocessed_masked_traces.npy", pairwise_operation("masked_traces.npy", 3, 1, multiplication, np.float64, verbose=True))