Commit 4f7d40276a60a51a9f7cc86f42293e7413bf26a8

Authored by Brice COLOMBIER
1 parent 6ce2d4c48e
Exists in master

Add indexes as return value

Showing 3 changed files with 74 additions and 54 deletions

... ... @@ -2,4 +2,5 @@
2 2 *.bin
3 3 *.txt
4 4 *.*~
  5 +*.org
... ... @@ -28,7 +28,7 @@
28 28 The script is typically used in the following manner:
29 29  
30 30 ```bash
31   -python preprocessing.py traces_masked.npy --op=multiplication --window_size=5 --min_dist=1 --dtype=float64 --ncores=4
  31 +python preprocessing.py masked_traces.npy --op=multiplication --window_size=5 --min_dist=1 --dtype=float64 --ncores=4
32 32 ```
33 33  
34 34 The parameter is the file in which the traces are stored in `numpy` format.
... ... @@ -36,7 +36,7 @@
36 36  
37 37 ## Options
38 38  
39   -- `--op`: operation to compute on the pair of samples. Should belong to `{'addition','multiplication','squared_addition','absolute_difference'}`
  39 +- `--op`: operation to compute on the pair of samples. Should belong to `{'addition','multiplication','squared_addition','absolute_difference'}`. In DPA book it is said that `absolute difference` is a good choice for second-order CPA attacks that leak the Hamming weight.
40 40 - `--window_size`: width of the sliding window
41 41 - `--min_dist`: minimum distance between two samples in a pair
42 42 - `--dtype`: `numpy` data type for the samples of the processed trace
preprocessing.py View file @ 4f7d402
... ... @@ -5,8 +5,7 @@
5 5 import itertools
6 6  
7 7 # Imports for parallel processing
8   -from multiprocessing import Pool
9   -from multiprocessing import current_process
  8 +from multiprocessing import Pool, current_process
10 9 import math
11 10 import copy
12 11  
13 12  
14 13  
15 14  
16 15  
17 16  
18 17  
19 18  
20 19  
21 20  
22 21  
... ... @@ -26,53 +25,62 @@
26 25 minimum_distance,
27 26 operation,
28 27 dtype,
  28 + start_index=0,
29 29 verbose=False,
30 30 first_chunk=True):
31 31  
32   - """Operates in a sliding window_size over the whole trace
33   - Computes x_i * x_j for all possible pair of samples (x_i, x_j)
34   - in the window_size with distance(x_i, x_j) > minimmum_distance.
  32 + """Operates in a sliding window_size over the trace
  33 + Computes f(x_i, x_j) for all possible pairs of samples (x_i, x_j)
  34 + in the window_size with distance(x_i, x_j) > minimum_distance.
35 35  
36 36 Keyword arguments:
37 37 traces_name: name of the file storing the traces
38 38 window_size: size of the window in which pairwise operation is done
39 39 minimum_distance: minimum distance between two samples processed
40 40 operation: processing operation to apply on the pair of samples
  41 + start_index:
41 42 verbose: display INFO
42 43 first_chunk: indicates first chunk of data for parallel processing
  44 +
  45 + Returns:
  46 + preprocessed_trace: numpy array containing the preprocessed trace
  47 + indexes: the indexes of the processed pairs
43 48 """
44 49  
45 50 if verbose:
46 51 log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
47   - log.info("Verbose output enabled")
  52 + if not current_process()._identity or current_process()._identity[0] == 1:
  53 + log.info("Verbose output enabled")
48 54 else:
49 55 log.basicConfig(format="%(levelname)s: %(message)s")
50 56  
51 57 nb_traces, nb_samples = np.shape(traces)
52   - log.info("Input: {0} traces of {1} samples".format(nb_traces, nb_samples))
53   -
54   - # Report file size increase
55   - if first_chunk:
56   - processed_trace_length = len(list(itertools.combinations(range(window_size), 2))) + (nb_samples-window_size)*(window_size-1)
57   - print processed_trace_length
58   - else:
59   - processed_trace_length = len(list(itertools.combinations(range(window_size), 2))) + (nb_samples-window_size)*(window_size-1)
60   - # Allocate memory for the processed trace
61   - processed_trace = np.zeros((processed_trace_length, nb_traces), dtype=dtype)
  58 + if not current_process()._identity:
  59 + log.info("Processing {0} traces of {1} samples".format(nb_traces, nb_samples))
  60 +
  61 + preprocessed_trace_length = 0
  62 + for current_distance in xrange(minimum_distance, window_size):
  63 + for current_start_index in xrange(nb_samples - current_distance):
  64 + if first_chunk or (not first_chunk and current_start_index+current_distance>=window_size-1):
  65 + preprocessed_trace_length+=1
  66 + # Allocate memory for the preprocessed trace
  67 + preprocessed_trace = np.zeros((preprocessed_trace_length, nb_traces), dtype=dtype)
62 68 current_index = 0
  69 + indexes = np.zeros((preprocessed_trace_length),dtype='i,i')
63 70 # For all possible start indices in the window
64   - for current_distance in range(minimum_distance, window_size):
65   - for current_start_index in range(nb_samples - current_distance):
  71 + for current_distance in xrange(minimum_distance, window_size):
  72 + for current_start_index in xrange(nb_samples - current_distance):
66 73 if first_chunk or (not first_chunk and current_start_index+current_distance>=window_size-1):
67 74 value = np.array(operation(traces[:,current_start_index], traces[:,current_start_index+current_distance]), ndmin=2)
68 75 # Store the resulting vector
69   - processed_trace[current_index,:] = np.transpose(value)[:,0]
  76 + preprocessed_trace[current_index,:] = np.transpose(value)[:,0]
  77 + indexes[current_index] = (start_index+current_start_index, start_index+current_start_index+current_distance)
70 78 # Increase the running index
71 79 current_index+=1
72   - # print "Number of columns to remove", len(processed_trace[np.all(processed_trace == 0, axis=1)])
73   - processed_trace = processed_trace[~np.all(processed_trace == 0, axis=1)]
74   - return np.transpose(processed_trace)
  80 + preprocessed_trace = np.transpose(preprocessed_trace)
  81 + return preprocessed_trace, indexes
75 82  
  83 +# Operations to perform on the pairs of samples
76 84 def multiplication(*args):
77 85 return args[0] * args[1]
78 86 def addition(*args):
... ... @@ -83,7 +91,8 @@
83 91 return abs(args[0] - args[1])
84 92  
85 93 if __name__ == "__main__":
86   -
  94 +
  95 + # Parsing arguments
87 96 parser = argparse.ArgumentParser(description='Preprocess traces')
88 97 parser.add_argument("traces_name", type=str)
89 98 parser.add_argument("--op", type=str, choices=['addition', 'multiplication', 'squared_addition', 'absolute_difference'])
90 99  
91 100  
92 101  
93 102  
94 103  
95 104  
96 105  
97 106  
98 107  
... ... @@ -93,51 +102,61 @@
93 102 parser.add_argument("--ncores", type=int)
94 103 parser.add_argument('-v', '--verbose', action='store_true')
95 104 args = parser.parse_args()
  105 +
96 106 if args.op == 'multiplication': operation = multiplication
97 107 elif args.op == 'addition': operation = addition
98 108 elif args.op == 'squared_addition': operation = squared_addition
99 109 elif args.op == 'absolute_difference': operation = absolute_difference
100 110 dtype = np.dtype(args.dtype).type
  111 + if args.verbose:
  112 + log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
  113 + if not current_process()._identity or current_process()._identity[0] == 1:
  114 + log.info("Verbose output enabled")
  115 + else:
  116 + log.basicConfig(format="%(levelname)s: %(message)s")
101 117  
102   - data_set_width = 1000
103   - data_set_height = 1
104   - test_array = np.array([range(i, i+data_set_width) for i in range(data_set_height)])
105   - # print test_array
  118 +
  119 + # Generate fake data for testing purposes
  120 + data_set_width = 2000
  121 + data_set_height = 1000
  122 + # test_array = np.array([xrange(i, i+data_set_width-data_set_height) for i in xrange(data_set_height)])
  123 + test_array = np.random.rand(data_set_height, data_set_width)
106 124 traces = test_array
  125 + # Load traces from file
107 126 # traces = np.load(args.traces_name)
  127 +
  128 + # Shorten the traces to split them into equally-sized chunks
108 129 shortened = 0
109 130 while int(np.shape(traces)[1] + (args.ncores - 1)*(args.window_size - 1))%args.ncores != 0:
110 131 traces = traces[:,:-1].copy()
111 132 shortened+=1
112 133 if shortened:
113 134 log.warning("Traces shortened by {0} samples to split them into equally-sized chunks".format(shortened))
114   - # print traces
115   - t0 = time.time()
116   - processed_traces = pairwise_operation(traces, args.window_size, args.min_dist, operation, dtype, args.verbose)
117   - for i in processed_traces:
118   - i.sort()
119   - print "###\nNORMAL"
120   - print np.shape(processed_traces)
121   - print round(time.time()-t0, 2), "s"
122   - t0 = time.time()
123   - print "###"
  135 + nb_samples = np.shape(traces)[1]
124 136  
  137 + # Perform non-parallel preprocessing
  138 + preprocessed_traces, indexes = pairwise_operation(traces, args.window_size, args.min_dist, operation, dtype, args.verbose)
  139 +
  140 + # Init pool of workers for parallel preprocessing
125 141 pool = Pool(args.ncores)
  142 + # Compute the size of each chunk of traces to be preprocessed
126 143 chunk_size = int(np.shape(traces)[1]+(args.ncores-1)*(args.window_size-1))/args.ncores
127   - print "Traces split into chunks of {0} samples".format(chunk_size)
  144 + log.info("Traces split into {0} chunks of {1} samples".format(args.ncores, chunk_size))
  145 + # Split the traces, with overlapping
128 146 traces = view_as_windows(traces, (np.shape(traces)[0],chunk_size), step=chunk_size-args.window_size+1)[0]
129   - # print traces
130   - arguments = [(trace_set, args.window_size, args.min_dist, operation, dtype, args.verbose, first_chunk) for (trace_set, first_chunk) in zip(traces, [True]+(args.ncores-1)*[False])]
131   - processed_traces_parallel = np.concatenate(pool.map(multi_run_wrapper, arguments), axis=1)
132   - for i in processed_traces_parallel:
133   - i.sort()
134   - print "###\nPARALLEL"
135   - print np.shape(processed_traces_parallel)
136   - print round(time.time()-t0, 2), "s"
137   - t0 = time.time()
138   - print "###"
139   - if np.all(processed_traces==processed_traces_parallel):
140   - print "SURPRISINGLY, IT ACTUALLY WORKED !!!"
141   - # pairwise_operation(traces, args.window_size, args.min_dist, operation, dtype, args.verbose)
142   - # np.save("preprocessed_masked_traces.npy", pairwise_operation("masked_traces.npy", 3, 1, multiplication, np.float64, verbose=True))
  147 + # Create the list of arguments
  148 + start_indexes = xrange(0, nb_samples, chunk_size-args.window_size+1)
  149 + arguments = [(trace_set, args.window_size, args.min_dist, operation, dtype, start_index, args.verbose, first_chunk) for (trace_set, start_index, first_chunk) in zip(traces, start_indexes, [True]+(args.ncores-1)*[False])]
  150 + # Run the parallel computation
  151 + parallel_processing_results = np.concatenate(pool.map(multi_run_wrapper, arguments), axis=1)
  152 + # print parallel_processing_results
  153 + preprocessed_traces_parallel, indexes_parallel = parallel_processing_results[::2], parallel_processing_results[1::2]
  154 + preprocessed_traces_parallel = np.concatenate(preprocessed_traces_parallel, axis=1)
  155 + indexes_parallel = np.concatenate(indexes_parallel, axis=1)
  156 +
  157 + # Compare normal and parallel processing
  158 + if np.all(preprocessed_traces.sort()==preprocessed_traces_parallel.sort()):
  159 + if np.all(indexes.sort()==indexes_parallel.sort()):
  160 + print "###\nGreat, sequential and\nparallel processing\nreturned the same result\n###"
  161 + np.save("preprocessed_masked_traces.npy", preprocessed_traces_parallel)