Parsing large flat files with irregular delimiters
using System;
using System.Collections;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace FileSplitter
{
class Program
{
public static void Main(string[] args)
{
if(args.Length < 4){
Console.WriteLine("Usage: <path> <column delim> <row delim> <max rows>");
}
else
{
string FilePathnName = args[0];
string oFilePathnName = "Candidates";
//string oFilePathnName = "Requisitions";
string cLine = null;
string cDelim = null;
string rDelim = "<><>";
string nRow = null;
string oRow = null;
string sRow = null;
int iCount = 0;
string strBuild = null;
string sColumn = "candidatehrms_code|firstname|middlename|lastname|address1|address2|city|state|zip|countryhrms_code|email|login|password|isemployee|statushrms_code|resumertf|referraltypehrms_code|referraldetail|active|homephone<><>";
//string sColumn = "requisitionhrms_code|jobtitle|markettitle|orderdate|statusdate|createdate|internalexpires|jobcode|jobgradehrms_code|externaljobdescription|internaljobdescription|approved|statushrms_code|openings|placements|flsaexempt|fullpart|eligibleemployeereferral|wagecurrencyhrms_code|countryhrms_code|departmentshrms_code|locationhrms_code|hiringmanagerhrms_code|recruiterhrms_code|eeocodehrms_code|jobcategoryhrms_code|percenttravel|workflowhrms_code|webapproved|confidentialreq|supervisory|active|udfhirereqid|requisitiontypehrms_code|employmenttypehrms_code|incumbentname|aibtarget<><>";
StreamReader sReader = new StreamReader(FilePathnName);
StreamWriter sWriter;
while(sReader.Peek() >= 0){
sRow = sRow + sReader.ReadLine();
if(sRow.Contains("~~")){
iCount = iCount + 1 ;
string[] dRow = sRow.Split("~~".ToCharArray());
if(dRow.Length > 1){
oRow = dRow[0];
sRow = dRow[1] + dRow[2];
strBuild = strBuild + oRow + "<><>";
if(iCount == 1500){
strBuild = sColumn + strBuild;
sWriter = File.CreateText(oFilePathnName + "_" + DateTime.Now.ToString("yyyyMMdd")+ "_" + DateTime.Now.ToString("HHmmss")+ ".txt");
sWriter.Write(strBuild);
sWriter.Close();
iCount = 0;
strBuild = null;
}
else{
if(sReader.Peek() <= 0 ){
strBuild = sColumn + strBuild;
sWriter = File.CreateText(oFilePathnName + "_" + DateTime.Now.ToString("yyyyMMdd")+ "_" + DateTime.Now.ToString("HHmmss")+ ".txt");
sWriter.Write(strBuild);
sWriter.Close();
iCount = 0;
strBuild = null;
}
}
}
}
}
}
}
}
}
using System.Collections;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace FileSplitter
{
class Program
{
public static void Main(string[] args)
{
if(args.Length < 4){
Console.WriteLine("Usage: <path> <column delim> <row delim> <max rows>");
}
else
{
string FilePathnName = args[0];
string oFilePathnName = "Candidates";
//string oFilePathnName = "Requisitions";
string cLine = null;
string cDelim = null;
string rDelim = "<><>";
string nRow = null;
string oRow = null;
string sRow = null;
int iCount = 0;
string strBuild = null;
string sColumn = "candidatehrms_code|firstname|middlename|lastname|address1|address2|city|state|zip|countryhrms_code|email|login|password|isemployee|statushrms_code|resumertf|referraltypehrms_code|referraldetail|active|homephone<><>";
//string sColumn = "requisitionhrms_code|jobtitle|markettitle|orderdate|statusdate|createdate|internalexpires|jobcode|jobgradehrms_code|externaljobdescription|internaljobdescription|approved|statushrms_code|openings|placements|flsaexempt|fullpart|eligibleemployeereferral|wagecurrencyhrms_code|countryhrms_code|departmentshrms_code|locationhrms_code|hiringmanagerhrms_code|recruiterhrms_code|eeocodehrms_code|jobcategoryhrms_code|percenttravel|workflowhrms_code|webapproved|confidentialreq|supervisory|active|udfhirereqid|requisitiontypehrms_code|employmenttypehrms_code|incumbentname|aibtarget<><>";
StreamReader sReader = new StreamReader(FilePathnName);
StreamWriter sWriter;
while(sReader.Peek() >= 0){
sRow = sRow + sReader.ReadLine();
if(sRow.Contains("~~")){
iCount = iCount + 1 ;
string[] dRow = sRow.Split("~~".ToCharArray());
if(dRow.Length > 1){
oRow = dRow[0];
sRow = dRow[1] + dRow[2];
strBuild = strBuild + oRow + "<><>";
if(iCount == 1500){
strBuild = sColumn + strBuild;
sWriter = File.CreateText(oFilePathnName + "_" + DateTime.Now.ToString("yyyyMMdd")+ "_" + DateTime.Now.ToString("HHmmss")+ ".txt");
sWriter.Write(strBuild);
sWriter.Close();
iCount = 0;
strBuild = null;
}
else{
if(sReader.Peek() <= 0 ){
strBuild = sColumn + strBuild;
sWriter = File.CreateText(oFilePathnName + "_" + DateTime.Now.ToString("yyyyMMdd")+ "_" + DateTime.Now.ToString("HHmmss")+ ".txt");
sWriter.Write(strBuild);
sWriter.Close();
iCount = 0;
strBuild = null;
}
}
}
}
}
}
}
}
}
Now the only remaining issue is that it strips \r \n and replaces it with tabs and i have no idea why.

There are no comments for this entry.
[Add Comment]