[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # This file implements a Perl class for dealing with 2 # comma-separated-value (CSV) spreadsheets, such as those saved from 3 # OpenOffice or Excel. 4 # 5 # The file is assumed to look something like this: 6 # 7 # "Asset Tag","Owner","Description","Key" 8 # "x1000",1348,"Microsoft Windows XP","12345-ABCDE-FGHIJ-KLMNO-PQRST" 9 # "x1001",1348,"Microsoft Office XP","12345-ABCDE-FGHIJ-KLMNO-PQRST" 10 # "x1002",1348,"Adobe Acrobat 5.0","KWW505R7104502-158" 11 # 12 # That is, each line consists of items separated by commas. Items may 13 # be enclosed in double-quotes, and must be so enclosed if they 14 # include any funny characters (like commas). 15 # 16 # Conceptually, the spreadsheet is a collection of records mapping 17 # field names to field values. The first line of the file contains 18 # the names of the fields, while each other line provides the values 19 # of the fields for one record. 20 21 use strict; 22 23 package CSV; 24 use Carp; 25 26 # Parse a single line from a CSV file and return an array of its 27 # items. Unlikely to be called externally. 28 sub parse_line ($) { 29 my ($line) = @_; 30 31 my @ret; 32 33 # Clobber line ending, if any. 34 chomp $line; 35 $line =~ s/\r\z//; 36 37 # Hack to make every field start with comma delimiter 38 $line = ",$line"; 39 40 while ($line ne '') { 41 if ($line =~ /^,\"((?:\"\"|[^\"])*)\"((?:,|\z).*)/) { 42 my ($val, $rest) = ($1, $2); 43 # Replace double-quote with single-quote 44 $val =~ s/\"\"/\"/g; 45 push @ret, $val; 46 $line = $rest; 47 } 48 elsif ($line =~ /^,([^,]*)((?:,|\z).*)/) { 49 push @ret, $1; 50 $line = $2; 51 } 52 else { 53 die "Unparsable line:\n$line"; 54 } 55 } 56 57 return @ret; 58 } 59 60 # Class method. Read a CSV file and return an object representing it. 61 # Specifically, return a reference to an array of records, where each 62 # record is a hash mapping field names (from the first line of the 63 # file) to field values. 64 sub read_file ($$) { 65 my ($class, $filename) = @_; 66 67 my $ret = [ ]; 68 69 open CSV_FILE, $filename 70 or croak "Unable to open $filename for reading: $^E"; 71 72 # Read first line to get field names. 73 my $first_line = <CSV_FILE>; 74 my @field_names = parse_line ($first_line); 75 76 # Check for duplicate field names. 77 my %names; 78 foreach my $name (@field_names) { 79 (exists $names{$name}) 80 and die "Duplicate field name \"$name\" in $filename"; 81 $names{$name} = undef; 82 } 83 84 my $num_fields = scalar @field_names; 85 while (my $line = <CSV_FILE>) { 86 next if $line =~ /^\s*$/; 87 my @fields = parse_line ($line); 88 $num_fields == scalar @fields 89 or die "Wrong number of items (expected $num_fields):\n$line\n "; 90 my $record = { }; 91 foreach my $i (0 .. $num_fields - 1) { 92 my $field_name = $field_names[$i]; 93 $record->{$field_name} = $fields[$i]; 94 } 95 push @$ret, $record; 96 } 97 98 close CSV_FILE 99 or croak "Unable to close $filename: $^E"; 100 101 bless $ret, $class; 102 return $ret; 103 } 104 105 # Method. Index a CSV object by one of its fields. Take a field name 106 # as argument, and return a hash mapping from a value to an array of 107 # records having that value for the field. Optional argument 108 # "transform" is a procedure which modifies the value before placing 109 # it in the hash. 110 sub index_by ($;$) { 111 my ($self, $field_name, $transform) = @_; 112 113 my $ret = { }; 114 115 foreach my $record (@$self) { 116 (exists $record->{$field_name}) 117 or croak "No such field $field_name; bailing"; 118 my $value = $record->{$field_name}; 119 defined $transform 120 and $value = &$transform ($value); 121 122 push @{$ret->{$value}}, $record; 123 } 124 125 return $ret; 126 } 127 128 1;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |