Modules
ADT Database GTK2 GUI IP PiJAX Public Sql Stdio Subversion System Tools Xosd lua v4l2 wx
Recent Changes
Public.Parser.XML2 1.50
Public.ZeroMQ 1.1
Public.Template.Mustache 1.0
Public.Protocols.XMPP 1.4
Sql.Provider.jdbc 1.0
Popular Downloads
Public.Parser.JSON2 1.0
Public.Parser.JSON 0.2
GTK2 2.23
Public.Web.FCGI 1.8
Public.Parser.XML2 1.48
|
Module Information
Public.Standards.CSV
Viewing contents of Public_Standards_CSV-0.1/module.pmod.in
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* Based on Format.CSV Pike module by
* James Tyson, DogStar SOFTWARE .
* Portions created by the Initial Developer are Copyright (C) 2005
* the Initial Developer. All Rights Reserved.
*
* Author(s):
* Bertrand LUPART
*
* ***** END LICENSE BLOCK ***** */
/* $Id$ */
string __version = "0.1";
string __author = "Bertrand LUPART ";
array __components = ({ "Public.pmod/Standards.pmod/CSV.pmod/module.pmod" });
static int default_type_detection = 0;
static object _enquote = Regexp("(,|\"|\n|\r)"); // Matches a string to be quoted
static object _int = Regexp("^[0-9]+$"); // Matches an int
static object _float = Regexp("^[0-9]+\\\.[0-9]*$"); // Matches a float
static object _string = Regexp("^\"*.+\"*$"); // Matches a string
/* Common CSV functions */
//! Enquote data to be put into a CSV file
//! This means doubling the quoting character: " -> ""
//!
//! @param in
//! The string to quote
//! Example: John "foo" Doe
//!
//! @returns
//! The quoted string, ready to be written in a CSV file
//! Example: John ""foo"" Doe
string enquote(string in)
{
return replace(in, ({ "\""}), ({ "\"\""}));
}
//! Dequote data taken from a CSV file
//! This means reducing double quoting character: "" -> "
//!
//! @param in
//! The string to dequote
//! Example: John ""foo"" Doe
//!
//! @returns
//! The string unquoted, ready to be processed
//! Example: John "foo" Doe
string dequote(string in)
{
return replace (in, ({ "\"\"" }) , ({ "\"" }));
}
//! Determines the "human" type of a string.
//!
//! In CSV, data are stored as strings, but the actual value can be of any type.
//!
//! Example:
//! "42" -> 42
//! "3.14" -> 3.14
//!
//! @param v
//! The data we want to determine type
//!
//! @returns
//! The actual value, casted as the determined type.
mixed detect_type(mixed v)
{
// Types can only be determined on a string
if(!stringp(v))
return v;
// MySQL's null
if (v == "\\N")
{
// Can't think of a good way to suggest NULL that isn't just 0.
return zero_type;
}
// Field is an int
if (_int->match(v))
{
return (int)v;
}
// Field is a float
if (_float->match(v))
{
return (float)v;
}
// Field is a string
if (_string->match(v))
{
return v;
}
return "";
}
/* CSVIterator */
// This CSVIterator takes a Stdio.FILE has argument, reads data from it using
// Stdio.FILE()'s line_iterator and convert the CSV data on the fly.
// Since that's a generic Iterator, it could be easily updated to take anything
// as an argument for parsing CSV from it.
class CSVIterator
{
static int csv_index=-1; // current CSV index
static array csv_line = ({ }); // current CSV data
static Stdio.FILE input_file; // The file containing the data
static int file_remaining = 1; // Is there still some data in the file to read?
// file_iterator reads the file a line at a time
object file_iterator;
/* Iterator API */
//! @param _input
//! The file containing the CSV data
void create(Stdio.FILE _input)
{
input_file = _input;
// Get the line_iterator from Stdio.FILE.
// This allows to handle \n, \r\n, and \r files
file_iterator = input_file->line_iterator(1);
// Go to the next (first) item
next();
}
// Do we have still some data in our iterator?
int `!()
{
// Nothing's left in the file, no more CSV data
return !file_remaining;
}
// Get next elements from the iterator
CSVIterator `+=(int steps)
{
for(int i=0; ivalue();
// If no data from the file iterator
if (!in)
{
file_remaining=0; // there is no file remaining
csv_line = 0; // current CSV line is empty
return this; // exit
}
parse_csv(in); // parse csv and feed csv_line with them
// Sanity check loop over the colleted data
int count = -1;
foreach(csv_line, mixed v)
{
count++;
if (!sizeof(v))
{
csv_line[count] = "" ;
continue;
}
if ((v[0] == '\"') && (v[sizeof(v)-1] == '\"'))
{
// The string is surrounded by speechmarks, so let's
// remove them.
csv_line[count] = v[1..sizeof(v)-2];
}
}
// Go to the next element
csv_index++;
file_iterator->next();
}
return this;
}
// The current index for the iterator
int index()
{
return csv_index;
}
// Increment the iterator
int next()
{
`+=(1);
}
// The CSV data for our current index
int|array value()
{
return csv_line;
}
/* CSVIterator-specific methods */
//! Parses a string and tries to find some CSV data in it.
//! The csv_line array is fed with the data.
//!
//! No heuristic is done yet to try to manage malformed CSV data.
//!
//! @param in
//! The line from the file we want to parse, as a string
static void parse_csv(string in)
{
// We can't just divide the string on comma, since commas can be quoted
int quoted = 0; // are we inside a quote sequence?
int last = 0; // the last char we cared about when feeding result array
int i = 0; // our current position in the file
array result = ({ });
while(sizeof(in[i..i]))
{
switch(in[i..i])
{
// a " is found, reverse the quote status
case "\"":
quoted=!quoted;
break;
// a , is found
case ",":
// if we are not in a quote sequence, split the string
if(!quoted)
{
result += ({ dequote(in[last..(i-1)]) });
last=i+1;
}
break;
}
// If we're at the end of the line and quoted, we have a CRLF in a field
// Adding a LF and go to the next the next line
if(quoted && i==(sizeof(in)-1))
{
// FIXME: we're adding \n here, regardless the original data was \n, \r
// or \r\n
if(file_iterator->next())
in+="\n"+file_iterator->value();
else
{
file_remaining=0;
continue;
}
}
i++;
}
// Adding the last part
result+=({ dequote(in[last..]) });
csv_line=result;
}
}
/* Public.Standards.CSV.FILE */
class FILE
{
inherit Stdio.FILE;
static int _standards=1; // Do we want to be standards compliant for output?
static int do_type_detection=default_type_detection;
// csv_iterator reads a CSV line at a time
// a CSV line can be splitted into multiple file lines
object csv_iterator;
//! If standards compliant, not all the fields will be enclosed in double
//! quotes, only thoses containing double quotes, commas and newlines
//!
//! @param t
//! 1 sets the file to be standards compliant
//! 0 unsets it
void set_standard_compliance(int t)
{
_standards = t;
}
//! If standards compliant, not all the fields will be enclosed in double
//! quotes, only thoses containing double quotes, commas and newlines
//!
//! @returns
//! 1 or 0 wether the file has been set standards compliant or not
int get_standard_compliance()
{
return _standards;
}
//! Enable or disable the type detection.
//!
//! @param t
//! 1 sets the file to detect types
//! 0 unsets it
//!
//! @returns
//! 1 or 0 wether the file has been set do detect types or not
void set_type_detection(int t)
{
do_type_detection = t;
}
//! Check if type detection is enabled or not.
//!
//! @returns
//! 1 or 0 wether the file has been set to detect types or not
int get_type_detection()
{
return do_type_detection;
}
//! Write a row
//!
//! @param row
//! The data to write
//!
//! @returns
//! The number of bytes written
int write_row(mixed... row)
{
if (arrayp(row) && (sizeof(row) == 1) && arrayp(row[0]))
row = row[0];
array result = ({});
foreach(row, mixed r)
{
string v = (string)r;
if (_standards)
{
if (_enquote->match(v))
result += ({ sprintf("\"%s\"", enquote(v)) });
else
result += ({ enquote(v) });
}
else
result += ({ sprintf("\"%s\"", enquote(v)) });
}
return ::write((result * ",") + "\n");
}
//! Read a row
//!
//! @returns
//! The row splitted into an array
//! 0 if no data
int|array read_row()
{
// We have to instanciate the CSVIterator the first time
if(!objectp(csv_iterator))
{
csv_iterator = CSVIterator(this_object());
}
mixed res = csv_iterator->value();
// Type detection has not made it into CSVIterator, because it was hell
// to set/unset type detection on the fly this way
if(do_type_detection)
{
foreach(res; mixed indice; mixed value)
{
res[indice] = detect_type(value);
}
}
// Move the iterators to the next line
csv_iterator->next();
return res;
}
static object _get_iterator()
{
return CSVIterator(this_object());
}
static string _sprintf(mixed... args)
{
return replace(::_sprintf(@args), "Stdio.FILE", "Public.Standards.CSV.FILE");
}
}
|
|
|