Module Information
Viewing contents of Public_Standards_CSV-0.1/module.pmod.in

/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 * Based on Format.CSV Pike module by
 * James Tyson, DogStar SOFTWARE .
 * Portions created by the Initial Developer are Copyright (C) 2005
 * the Initial Developer. All Rights Reserved.
 * Author(s):
 *   Bertrand LUPART 
 * ***** END LICENSE BLOCK ***** */

/* $Id$ */

string __version = "0.1";
string __author = "Bertrand LUPART ";
array __components = ({ "Public.pmod/Standards.pmod/CSV.pmod/module.pmod" });

static int default_type_detection = 0;

static object _enquote = Regexp("(,|\"|\n|\r)"); // Matches a string to be quoted

static object _int = Regexp("^[0-9]+$"); // Matches an int
static object _float = Regexp("^[0-9]+\\\.[0-9]*$"); // Matches a float
static object _string = Regexp("^\"*.+\"*$"); // Matches a string

/* Common CSV functions */

//! Enquote data to be put into a CSV file
//! This means doubling the quoting character: " -> ""
//! @param in
//! The string to quote
//!  Example: John "foo" Doe
//! @returns
//! The quoted string, ready to be written in a CSV file
//!  Example: John ""foo"" Doe
string enquote(string in)
	return replace(in, ({ "\""}), ({ "\"\""}));

//! Dequote data taken from a CSV file
//! This means reducing double quoting character: "" -> "
//! @param in
//! The string to dequote
//!  Example: John ""foo"" Doe
//! @returns
//! The string unquoted, ready to be processed
//!  Example: John "foo" Doe
string dequote(string in)
	return replace (in, ({ "\"\"" }) , ({ "\"" }));

//! Determines the "human" type of a string.
//! In CSV, data are stored as strings, but the actual value can be of any type.
//! Example:
//!  "42" -> 42
//!  "3.14" -> 3.14
//! @param v
//! The data we want to determine type
//! @returns
//! The actual value, casted as the determined type.
mixed detect_type(mixed v)
	// Types can only be determined on a string
		return v;

	// MySQL's null
 	if (v == "\\N")
		// Can't think of a good way to suggest NULL that isn't just 0.
		return zero_type; 

	// Field is an int
	if (_int->match(v))
		return (int)v;

	// Field is a float
 	if (_float->match(v))
		return (float)v;

	// Field is a string
	if (_string->match(v))
		return v;

	return "";

/* CSVIterator */

// This CSVIterator takes a Stdio.FILE has argument, reads data from it using 
// Stdio.FILE()'s line_iterator and convert the CSV data on the fly.
// Since that's a generic Iterator, it could be easily updated to take anything
// as an argument for parsing CSV from it.
class CSVIterator
	static int csv_index=-1; // current CSV index
	static array csv_line = ({ }); // current CSV data

	static Stdio.FILE input_file; // The file containing the data
	static int file_remaining = 1; // Is there still some data in the file to read?

	// file_iterator reads the file a line at a time
	object file_iterator;

	/* Iterator API */

	//! @param _input
	//! The file containing the CSV data
	void create(Stdio.FILE _input)
		input_file = _input;

		// Get the line_iterator from Stdio.FILE.
		// This allows to handle \n, \r\n, and \r files
		file_iterator = input_file->line_iterator(1); 

		// Go to the next (first) item

	// Do we have still some data in our iterator?
  int `!()
		// Nothing's left in the file, no more CSV data
		return !file_remaining;

	// Get next elements from the iterator
	CSVIterator `+=(int steps)
		for(int i=0; ivalue();

			// If no data from the file iterator
		  if (!in)
				file_remaining=0; // there is no file remaining
				csv_line = 0; // current CSV line is empty
				return this; // exit
			parse_csv(in); // parse csv and feed csv_line with them

			// Sanity check loop over the colleted data
			int count = -1;
			foreach(csv_line, mixed v)
				if (!sizeof(v))
					csv_line[count] = "" ;

				if ((v[0] == '\"') && (v[sizeof(v)-1] == '\"'))
  				// The string is surrounded by speechmarks, so let's
  				// remove them.
  				csv_line[count] = v[1..sizeof(v)-2];

			// Go to the next element

		return this;

	// The current index for the iterator
	int index()
		return csv_index;

	// Increment the iterator
	int next()

	// The CSV data for our current index
	int|array value()
		return csv_line;

	/* CSVIterator-specific methods */

	//! Parses a string and tries to find some CSV data in it.
	//! The csv_line array is fed with the data.
	//! No heuristic is done yet to try to manage malformed CSV data.
	//! @param in
	//!  The line from the file we want to parse, as a string
	static void parse_csv(string in)
		// We can't just divide the string on comma, since commas can be quoted
		int quoted = 0; // are we inside a quote sequence?
		int last = 0; // the last char we cared about when feeding result array
		int i = 0; // our current position in the file
		array result = ({ });
				// a " is found, reverse the quote status
				case "\"":
				// a , is found
				case ",":
					// if we are not in a quote sequence, split the string
						result += ({ dequote(in[last..(i-1)]) });
			// If we're at the end of the line and quoted, we have a CRLF in a field
			// Adding a LF and go to the next the next line
			if(quoted && i==(sizeof(in)-1))
				// FIXME: we're adding \n here, regardless the original data was \n, \r
				// or \r\n


		// Adding the last part 
		result+=({ dequote(in[last..]) });


/* Public.Standards.CSV.FILE */

class FILE
	inherit Stdio.FILE;

	static int _standards=1; // Do we want to be standards compliant for output?
	static int do_type_detection=default_type_detection;

	// csv_iterator reads a CSV line at a time
	// a CSV line can be splitted into multiple file lines
	object csv_iterator;

	//! If standards compliant, not all the fields will be enclosed in double
	//! quotes, only thoses containing double quotes, commas and newlines
	//! @param t
	//!  1 sets the file to be standards compliant
	//!  0 unsets it
	void set_standard_compliance(int t)
	  _standards = t;

	//! If standards compliant, not all the fields will be enclosed in double
	//! quotes, only thoses containing double quotes, commas and newlines
	//! @returns
	//! 1 or 0 wether the file has been set standards compliant or not
	int get_standard_compliance()
		return _standards;

	//! Enable or disable the type detection.
	//! @param t
	//!  1 sets the file to detect types
	//!  0 unsets it
	//! @returns
	//! 1 or 0 wether the file has been set do detect types or not
	void set_type_detection(int t)
		do_type_detection = t;

	//! Check if type detection is enabled or not.
	//! @returns
	//!  1 or 0 wether the file has been set to detect types or not
	int get_type_detection()
		return do_type_detection;

	//! Write a row
	//! @param row
	//!	The data to write
	//! @returns
	//! The number of bytes written 
	int write_row(mixed... row)
		if (arrayp(row) && (sizeof(row) == 1) && arrayp(row[0]))
			row = row[0];

	  array result = ({});
	  foreach(row, mixed r)
			string v = (string)r;

			if (_standards)
				if (_enquote->match(v))
					result += ({ sprintf("\"%s\"", enquote(v)) });
					result += ({ enquote(v) });
				result += ({ sprintf("\"%s\"", enquote(v)) });

	  return ::write((result * ",") + "\n");

	//! Read a row
	//! @returns
	//! The row splitted into an array
	//! 0 if no data
	int|array read_row()
		// We have to instanciate the CSVIterator the first time
			csv_iterator = CSVIterator(this_object());

		mixed res = csv_iterator->value();

		// Type detection has not made it into CSVIterator, because it was hell
		// to set/unset type detection on the fly this way
			foreach(res; mixed indice; mixed value)
				res[indice] = detect_type(value);

		// Move the iterators to the next line

		return res;

	static object _get_iterator()
		return  CSVIterator(this_object());

	static string _sprintf(mixed... args)
		return replace(::_sprintf(@args), "Stdio.FILE", "Public.Standards.CSV.FILE");

