123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- #!/bin/sh
- # -*- scheme -*-
- exec guile -e main -s "$0" "$@"
- !#
- ;; Double-Anonymize the trust.csv by replacing keys with
- ;; index-numbers. This prevents results from evaluations of the trust
- ;; graph from being applied directly to correlation attacks on Freenet
- ;; users.
- ;; TODO: use vhashes instead of regular hash tables.
- (use-modules (ice-9 rdelim)
- (ice-9 i18n)
- (srfi srfi-69) ; hash tables
- (srfi srfi-1) ; first, second, third
- )
- (define (set-add table . elements)
- (let add ((elements elements))
- (cond
- ((null? elements)
- table)
- (else
- (hash-table-set! table (car elements) #t)
- (add (cdr elements))))))
- (define (set-keys table)
- (hash-table-keys table))
- (define (set-size table)
- (hash-table-size table))
- (define (set->list-sorted table)
- (sort-list (set-keys table) string<?))
- (define (make-set)
- (make-hash-table))
- (define (get-ids port)
- (let collect-ids ((ids (make-set)))
- (let ((line (read-line port)))
- (cond
- ((eof-object? line)
- (set->list-sorted ids))
- (else
- (let* ((columns (string-split line #\;))
- (source (first columns))
- (target (second columns)))
- (collect-ids (set-add ids source target))))))))
- (define (index-ids-fun ids)
- (let ((id-to-index (make-hash-table)))
- (let fill-table ((ids ids)
- (index 0))
- (cond ((null? ids)
- id-to-index)
- (else
- (hash-table-set! id-to-index (car ids) index)
- (fill-table (cdr ids)
- (+ 1 index)))))
- (lambda (id) (hash-table-ref id-to-index id))))
- (define (check-csv-header port)
- (let ((header (read-line port))
- (required-header-lowercase "source;target"))
-
- (when (not (string-prefix? required-header-lowercase (string-locale-downcase header)))
- (error (format #f "input file must have header '~A' (regardless of case) but has header '~A'" required-header-lowercase header)))))
- (define (index-ids-from-file port)
- (check-csv-header port)
- (let* ((ids (get-ids port))
- (id->index (index-ids-fun ids)))
- id->index))
-
- (define (anonymize-ids id->index inport outport)
- (check-csv-header inport)
- (format outport "Source;Target;Weight\n")
- (let anonymize ((line (read-line inport)))
- (cond
- ((eof-object? line) #t)
- (else
- (let* ((columns (string-split line #\;))
- (source (id->index (first columns)))
- (target (id->index (second columns)))
- (weight (third columns)))
- (format outport "~A;~A;~A\n" source target weight))
- (anonymize (read-line inport))))))
- (define (main args)
- (let ((infile (if (null? (cdr args))
- "trust-deduplicated.csv"
- (second args)))
- (outfile (if (or (null? (cdr args)) (null? (cdr (cdr args))))
- "trust-anonymized.csv"
- (third args))))
- (let ((id->index (call-with-input-file infile index-ids-from-file))
- (inport (open-input-file infile))
- (outport (open-output-file outfile)))
- (anonymize-ids id->index inport outport)
- (close-port inport)
- (close-port outport))))
|