I have the following method to take an XLSX file and convert it to an XDocument:
public static XDocument ConvertXlsx2Xml(string fileName, string sheetName)
{
// Return the value of the specified cell.
const string documentRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
const string worksheetSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
const string sharedStringsRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
const string sharedStringSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
string cellValue = null;
var xsurvey = new XDocument(new XDeclaration("1.0", "UTF-8", "yes"));
var xroot = new XElement("Root"); //Create the root
using (Package xlPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read))
{
PackagePart documentPart = null;
Uri documentUri = null;
// Get the main document part (workbook.xml).
foreach (System.IO.Packaging.PackageRelationship relationship in xlPackage.GetRelationshipsByType(documentRelationshipType))
{
// There should only be one document part in the package.
documentUri = PackUriHelper.ResolvePartUri(new Uri("/", UriKind.Relative), relationship.TargetUri);
documentPart = xlPackage.GetPart(documentUri);
// There should only be one instance, but get out no matter what.
break;
}
if (documentPart != null)
{
// Load the contents of the workbook.
var doc = new XmlDocument();
doc.Load(documentPart.GetStream());
/*
doc now contains the following important nodes:
<bookViews>
<workbookView xWindow="-15615" yWindow="2535" windowWidth="26835" windowHeight="13095" activeTab="2" />
<sheets>
<sheet name="Sheet1" sheetId="2" r:id="rId1" />
*/
// Create a namespace manager, so you can search.
// Add a prefix (d) for the default namespace.
var nt = new NameTable();
var nsManager = new XmlNamespaceManager(nt);
nsManager.AddNamespace("d", worksheetSchema);
nsManager.AddNamespace("s", sharedStringSchema);
//If value for sheetName isn't found, take the first sheet
string searchString = string.Format("//d:sheet[#name='{0}']", sheetName);
XmlNode sheetNode = doc.SelectSingleNode(searchString, nsManager) ??
doc.SelectSingleNode("//d:sheet", nsManager);
/*
* 11/15/12 DS Added to avoid pulling the data each time
* Create a dictionary of the shared strings from the associated string file
*/
#region Shared String Dictionary
var sharedStrings = new Dictionary<int, string>();
foreach (System.IO.Packaging.PackageRelationship stringRelationship in documentPart.GetRelationshipsByType(sharedStringsRelationshipType))
{
// There should only be one shared string reference, so you exit this loop immediately.
Uri sharedStringsUri = PackUriHelper.ResolvePartUri(documentUri, stringRelationship.TargetUri);
PackagePart stringPart = xlPackage.GetPart(sharedStringsUri);
{
// Load the contents of the shared strings.
var stringDoc = new XmlDocument(nt);
stringDoc.Load(stringPart.GetStream());
nsManager.AddNamespace("s", sharedStringSchema);
const string strSearch = "//s:sst";
XmlNode stringNode = stringDoc.SelectSingleNode(strSearch, nsManager);
int keyInt = 0;
if (stringNode != null)
foreach (XmlElement nd in stringNode)
{
//string test = nd.InnerText;
sharedStrings.Add(keyInt, nd.InnerText);
keyInt = keyInt + 1;
}
}
}
#endregion
var hrowList = new List<string>();
var hrowArray = new string[] {};
if (sheetNode != null && sheetNode.Attributes != null)
{
// Get the relId attribute:
XmlAttribute relationAttribute = sheetNode.Attributes["r:id"];
if (relationAttribute != null)
{
string relId = relationAttribute.Value;
// First, get the relation between the document and the sheet.
PackageRelationship sheetRelation = documentPart.GetRelationship(relId);
Uri sheetUri = PackUriHelper.ResolvePartUri(documentUri, sheetRelation.TargetUri);
PackagePart sheetPart = xlPackage.GetPart(sheetUri);
// Load the contents of the workbook.
var sheetDoc = new XmlDocument(nt);
sheetDoc.Load(sheetPart.GetStream());
/*
* sheetDoc now contains the following important nodes:
* <dimension ref="A1:V81" /> range of sheet data
* <sheetData>
* <row r="1" spans="1:22"> <row> r = row number, spans = columns containing the data
* <c r="A1" t="s"><v>0</v></c> <c> r = Cell address (A1,B4,etc), t = data type ("s"=string,"b"=bool, null=decimal)
* <v> contents are the index num if t="s", or value of t=null
*/
XmlNode sheetDataNode = sheetDoc.SelectSingleNode("//d:sheetData", nsManager);
int roNum = 0;
if (sheetDataNode != null)
{
var isSkip = false;
foreach (XmlElement row in sheetDataNode)
{
var xrow = new XElement("Row");
foreach (XmlElement cell in row)
{
XmlAttribute typeAttr = cell.Attributes["t"];
string cellType = typeAttr != null ? typeAttr.Value : string.Empty;
XmlNode valueNode = cell.SelectSingleNode("d:v", nsManager);
cellValue = valueNode != null ? valueNode.InnerText : cellValue;
// Check the cell type. At this point, this code only checks for booleans and strings individually.
switch (cellType)
{
case "b":
cellValue = cellValue == "1" ? "TRUE" : "FALSE";
break;
case "s":
cellValue = sharedStrings[Convert.ToInt32(cellValue)];
break;
}
if (cellValue == null) continue;
cellValue = cellValue.Replace("\r", "");
cellValue = cellValue.Replace("\n", " ");
cellValue = cellValue.Trim();
if (roNum == 0)
{
hrowList.Add(cellValue);
}
else
{
//XmlAttribute rowAttr = cell.Attributes["r"];
//int intStart = rowAttr.Value.IndexOfAny("0123456789".ToCharArray());
//colLet = rowAttr.Value.Substring(0, intStart);
//int colNum = NumberFromExcelColumn(colLet);
int colNum = GetColNum(cell);
/* 05/29/13 DS force column names to UPPER to remove case sensitivity */
var xvar = new XElement(hrowArray[colNum - 1].ToUpper());
xvar.SetValue(cellValue);
xrow.Add(xvar);
}
/* 6/18/2013 DS You must clear the cellValue so it is carried into the next cell value if it is empty. */
cellValue = "";
}
if (roNum == 0) hrowArray = hrowList.ToArray();
else xroot.Add(xrow);
roNum = roNum + 1;
}
}
}
}
}
}
xsurvey.Add(xroot);
return xsurvey;
}
For the most part, it works very well. However, I have just noticed that if one of the cell values contains a number like 0.004 it becomes 4.0000000000000001E-3.
The resulting XML gets imported and that value is loaded as a string, but before the final transfer to the production tables, this particular field is converted to numeric. That format doesn't work with numeric.
How do I prevent that change on load? If I can't, is there a better method to prevent a system error, other than specifically scrubbing that field and changing it back as part of the transfer process?
UPDATE
It is only numbers less than .01 that have a problem. 1, 1.004, and .04 are fine, but .004 is not.
UPDATE 2
If I format the cells as text BEFORE populating the data, I do not have this issue. There is something about how ManEx stores the data that prevents a clean upload.
Read value as double with invariant culture, and then convert it to decimal
var cellValueDouble = Convert.ToDouble(cellValue,System.Globalization.CultureInfo.InvariantCulture);
var cellValueDecimal = Convert.ToDecimal(cellValueDouble);
Related
I have difficulties understanding this example on how to use facets :
https://lucenenet.apache.org/docs/4.8.0-beta00008/api/Lucene.Net.Demo/Lucene.Net.Demo.Facet.SimpleFacetsExample.html
My goal is to create an index in which each document field have a facet, so that at search time i can choose which facets use to navigate data.
What i am confused about is setup of facets in index creation, to
summarize my question : is index with facets compatibile with
ReferenceManager?
Need DirectoryTaxonomyWriter to be actually written and persisted
on disk or it will embedded into the index itself and is just
temporary? I mean given the code
indexWriter.AddDocument(config.Build(taxoWriter, doc)); of the
example i expect it's temporary and will be embedded into the index (but then the example also show you need the Taxonomy to drill down facet). So can the Taxonomy be tangled in some way with the index so that the are handled althogeter with ReferenceManager?
If is not may i just use the same folder i use for storing index?
Here is a more detailed list of point that confuse me :
In my scenario i am indexing the document asyncrhonously (background process) and then fetching the indext ASAP throught ReferenceManager in ASP.NET application. I hope this way to fetch the index is compatibile with DirectoryTaxonomyWriter needed by facets.
Then i modified the code i write introducing the taxonomy writer as indicated in the example, but i am a bit confused, seems like i can't store DirectoryTaxonomyWriter into the same folder of index because the folder is locked, need i to persist it or it will be embedded into the index (so a RAMDirectory is enougth)? if i need to persist it in a different direcotry, can i safely persist it into subdirectory?
Here the code i am actually using :
private static void BuildIndex (IndexEntry entry)
{
string targetFolder = ConfigurationManager.AppSettings["IndexFolder"] ?? string.Empty;
//** LOG
if (System.IO.Directory.Exists(targetFolder) == false)
{
string message = #"Index folder not found";
_fileLogger.Error(message);
_consoleLogger.Error(message);
return;
}
var metadata = JsonConvert.DeserializeObject<IndexMetadata>(File.ReadAllText(entry.MetdataPath) ?? "{}");
string[] header = new string[0];
List<dynamic> csvRecords = new List<dynamic>();
using (var reader = new StreamReader(entry.DataPath))
{
CsvConfiguration csvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture);
csvConfiguration.AllowComments = false;
csvConfiguration.CountBytes = false;
csvConfiguration.Delimiter = ",";
csvConfiguration.DetectColumnCountChanges = false;
csvConfiguration.Encoding = Encoding.UTF8;
csvConfiguration.HasHeaderRecord = true;
csvConfiguration.IgnoreBlankLines = true;
csvConfiguration.HeaderValidated = null;
csvConfiguration.MissingFieldFound = null;
csvConfiguration.TrimOptions = CsvHelper.Configuration.TrimOptions.None;
csvConfiguration.BadDataFound = null;
using (var csvReader = new CsvReader(reader, csvConfiguration))
{
csvReader.Read();
csvReader.ReadHeader();
csvReader.Read();
header = csvReader.HeaderRecord;
csvRecords = csvReader.GetRecords<dynamic>().ToList();
}
}
string targetDirectory = Path.Combine(targetFolder, "Index__" + metadata.Boundle + "__" + DateTime.Now.ToString("yyyyMMdd_HHmmss") + "__" + Path.GetRandomFileName().Substring(0, 6));
System.IO.Directory.CreateDirectory(targetDirectory);
//** LOG
{
string message = #"..creating index : {0}";
_fileLogger.Information(message, targetDirectory);
_consoleLogger.Information(message, targetDirectory);
}
using (var dir = FSDirectory.Open(targetDirectory))
{
using (DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir))
{
Analyzer analyzer = metadata.GetAnalyzer();
var indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
using (IndexWriter writer = new IndexWriter(dir, indexConfig))
{
long entryNumber = csvRecords.Count();
long index = 0;
long lastPercentage = 0;
foreach (dynamic csvEntry in csvRecords)
{
Document doc = new Document();
IDictionary<string, object> dynamicCsvEntry = (IDictionary<string, object>)csvEntry;
var indexedMetadataFiled = metadata.IdexedFields;
foreach (string headField in header)
{
if (indexedMetadataFiled.ContainsKey(headField) == false || (indexedMetadataFiled[headField].NeedToBeIndexed == false && indexedMetadataFiled[headField].NeedToBeStored == false))
continue;
var field = new Field(headField,
((string)dynamicCsvEntry[headField] ?? string.Empty).ToLower(),
indexedMetadataFiled[headField].NeedToBeStored ? Field.Store.YES : Field.Store.NO,
indexedMetadataFiled[headField].NeedToBeIndexed ? Field.Index.ANALYZED : Field.Index.NO
);
doc.Add(field);
var facetField = new FacetField(headField, (string)dynamicCsvEntry[headField]);
doc.Add(facetField);
}
long percentage = (long)(((decimal)index / (decimal)entryNumber) * 100m);
if (percentage > lastPercentage && percentage % 10 == 0)
{
_consoleLogger.Information($"..indexing {percentage}%..");
lastPercentage = percentage;
}
writer.AddDocument(doc);
index++;
}
writer.Commit();
}
}
}
//** LOG
{
string message = #"Index Created : {0}";
_fileLogger.Information(message, targetDirectory);
_consoleLogger.Information(message, targetDirectory);
}
}
I am trying to figure out how to read the results of the Quickbooks Online report service. Specifically I am trying to show the results from reportBS on a label and here is my code:
OAuth2RequestValidator oauthValidator = new OAuth2RequestValidator(dictionary["accessToken"]);
ServiceContext serviceContext = new ServiceContext(dictionary["realmId"], IntuitServicesType.QBO, oauthValidator);
serviceContext.IppConfiguration.BaseUrl.Qbo = "https://sandbox-quickbooks.api.intuit.com/";
serviceContext.IppConfiguration.MinorVersion.Qbo = "55";
ReportService reportService = new ReportService(serviceContext);
reportService.accounting_method = "Accrual";
reportService.start_date = "2020-01-01";
reportService.end_date = "2020-10-31";
reportService.summarize_column_by = "Month";
serviceContext.IppConfiguration.Message.Response.SerializationFormat = Intuit.Ipp.Core.Configuration.SerializationFormat.Json;
ReportService defaultReportService1 = new ReportService(serviceContext);
string defaultReportName = "BalanceSheet";
Report reportBS = defaultReportService1.ExecuteReport(defaultReportName);
The result I get when I run my code and place reportBS on a label is Intuit.Ipp.Data.Report with nothing else.
You need to save the QBO report as JSON or XML string? As far as I know, converting a QBO report object to JSON or XML needs to be done manually.
Here is a question that shows how it is done with a profit and loss report. (this is actually very similar to the text extraction method I outlined in the first place, only the result gets reassembled as XML/JSON in your case).
If you just want to pass the report to another system or store the report serialization could fit the bill.
Following this basic report example:
private static void PrintRows(StringBuilder reportText, Row[] rows, int[] maxColumnSize, int level)
{
for (int rowIndex = 0; rowIndex < rows.Length; rowIndex++)
{
Row row = rows[rowIndex];
//Get Row Header
Header rowHeader = GetRowProperty(row, ItemsChoiceType1.Header);
//Append Row Header
if (rowHeader != null && rowHeader.ColData != null) { PrintColData(reportText, rowHeader.ColData, maxColumnSize, level); }
//Get Row ColData
ColData[] colData = GetRowProperty(row, ItemsChoiceType1.ColData);
//Append ColData
if (colData != null) { PrintColData(reportText, colData, maxColumnSize, level); }
//Get Child Rows
Rows childRows = GetRowProperty(row, ItemsChoiceType1.Rows);
//Append Child Rows
if (childRows != null) { PrintRows(reportText, childRows.Row, maxColumnSize, level + 1); }
//Get Row Summary
Summary rowSummary = GetRowProperty(row, ItemsChoiceType1.Summary);
//Append Row Summary
if (rowSummary != null && rowSummary.ColData != null) { PrintColData(reportText, rowSummary.ColData, maxColumnSize, level); }
}
}
StringBuilder reportText = new StringBuilder();
//Determine Maxmimum Text Lengths to format Report
//int[] maximumColumnTextSize = GetMaximumColumnTextSize(report);
//Append Column Headers
//PrintColumnData(reportText, report.Columns, maximumColumnTextSize, 0);
PrintRows(reportText, report.Rows, maximumColumnTextSize, 1);
i have method which create xml:
var xml = new XmlDocument();
var head = xml.CreateElement("Head");
var uniqueIdentifier = xml.CreateElement("Unique_identifier");
var documentDate = xml.CreateElement("Document_date");
var documentNumber = xml.CreateElement("Document_number");
uniqueIdentifier.InnerText = "1";
documentDate.InnerText = "2019-01-01";
documentNumber.InnerText = "2";
xml.AppendChild(head);
head.AppendChild(uniqueIdentifier);
head.AppendChild(documentDate);
head.AppendChild(documentNumber);
InnerText Values i get from db and this fields can be null or empty. so, each variable needs checked for null and an empty string
like this
var uniqueIdentifier = xml.CreateElement("Unique_identifier");
var r = reader.GetFieldValue("Unique_identifier");
if (r is string)
{
if (!string.IsNullOrWhiteSpace(r))
{
uniqueIdentifier.InnerText = Text.Convert(r);
}
}
I get a lot of extra lines(5 variable and 10 check). How clean it?
I have parsed the xml document and used a for loop to loop for getting different values in string, but when I try to return the value I get only the last value obtained, I want to return all the individual values so that I can store that values in any file format,
Below is my code,
XmlDocument xmlDOC = new XmlDocument();
xmlDOC.LoadXml(periodID_Value_Before_OffSet); // string storing my XML
var value = xmlDOC.GetElementsByTagName("value");
var xmlActions = new string[value.Count];
string values = "";
string Period1 = "";
string periodlevel_period1 = "";
var listOfStrings = new List<string>();
string modified_listofstrings = listOfStrings.ToString();
string arrayOfStrings = "";
for (int i = 0; i < value.Count; i++)
{
var xmlAttributeCollection = value[i].Attributes;
if (xmlAttributeCollection != null)
{
var action = xmlAttributeCollection["periodid"];
xmlActions[i] = action.Value;
values += action.Value + ",";
string vals = values.Split(',')[1];
string counts = values;
string[] periods = counts.Split(',');
Period1 = periods[i];
// periodlevel_period1 = Client.GetAttributeAsString(sessionId, Period1, "name", "");
modified_listofstrings = Client.GetAttributeAsString(sessionId, Period1, "name", "");
modified_listofstrings.ToArray().ToString();
//listOfStrings = periodlevel_period1;
}
}
return modified_listofstrings;
This modified_listofstrings string only return last on value, I want to return the array of the values all obtained while looping.
----------Updated question----------
below is my Sample XMl
<string xmlns="http://tempuri.org/">
<ResultSetHierarchy totalResultsReturned="1" totalResults="1" firstIndex="0" maxCount="-1">
<object id="SC.1938773693.238">
<measure.values>
<series id="SC.1938773693.108280985">
<value periodid="SC.1938773693.394400760" value="17" />
<value periodid="SC.1938773693.1282504058" value="15" />
<value periodid="SC.1938773693.1631528570" value="13" />
</series>
</object>
</ResultSetHierarchy>
</string>
I want output as "SC.1938773693.394400760":"17" and so on for all periodid
Based on the provided information I have updated the answer.
List<string> items = new List<string>();
XmlDocument xmlDOC = new XmlDocument();
xmlDOC.Load(#"E:\Delete Me\ConsoleApplication1\ConsoleApplication1\bin\Debug\List.xml");
var elements = xmlDOC.GetElementsByTagName("value");
foreach (var item in elements)
{
XmlElement value = (XmlElement)item;
items.Add(string.Format("{0}:{1}", value.GetAttribute("periodid"), value.GetAttribute("value")));
}
It looks like you're trying to:
Load an XmlDocument
Get a list of all the attributes of name 'periodid'
Look each periodid up using a webservice call
Return a list of the lookup results
If that is correct, the following method should do what you need:
public List<string> GetListOfData()
{
XmlDocument xmlDOC = new XmlDocument();
xmlDOC.LoadXml("<Html><value periodid='Yabba'>YabbaValue</value><value periodid='Dabba'>DabbaValue</value><value periodid='Doo'>DooValue</value></Html>"); // string storing my XML
var value = xmlDOC.GetElementsByTagName("value");
var listOfStrings = new List<string>();
for (int i = 0; i < value.Count; i++)
{
var xmlAttributeCollection = value[i].Attributes;
if (xmlAttributeCollection != null)
{
var action = xmlAttributeCollection["periodid"];
string Period1 = action.Value;
listOfStrings.Add(QPR_webService_Client.GetAttributeAsString(sessionId, Period1, "name", "") + ":" + value[i].InnerText);
}
}
return listOfStrings;
}
I have fixed my issue with reading the XML files. What I am needing now is to trim the datetime down to just MM dd yyyy hh:mm:ss and not bring the rest of it over when I insert into my Informix database.
This is the XML info:
<RecordFilingRequestMessage xmlns:nc="http://niem.gov/niem/niem-core/2.0">
<nc:DocumentIdentification>
<nc:IdentificationID>3212842</nc:IdentificationID>
</nc:DocumentIdentification>
<nc:DocumentPostDate>
<nc:DateTime>2013-06-25T11:32:08.5343733-04:00</nc:DateTime>
</nc:DocumentPostDate>
<nc:DocumentSubmitter>
<ecf:EntityPerson s:id="REVIEWER">
<nc:PersonName />
<nc:PersonOtherIdentification>
<nc:IdentificationID>41130</nc:IdentificationID>
<nc:IdentificationCategoryText>FLEPORTAL</nc:IdentificationCategoryText>
</nc:PersonOtherIdentification>
<nc:PersonOtherIdentification>
<nc:IdentificationID>kacolburn</nc:IdentificationID>
<nc:IdentificationCategoryText>FLEPORTAL_LOGONNAME</nc:IdentificationCategoryText>
</nc:PersonOtherIdentification>
...and here is my C# code:
string DocID = null;
int elementCount = 0;
string reqID = null;
string reqDateTime = null;
string empName = null;
string[] fileEntries = Directory.GetFiles(#"C:\XML\3212842.xml");
foreach (string fileName in fileEntries)
{
XmlReader xr = XmlReader.Create(fileName); //reads XML from folder
while (xr.Read())
{
if (xr.NodeType == XmlNodeType.Element && xr.Name == "nc:DateTime")
{
reqDateTime = xr.ReadElementContentAsString();
}
if (xr.NodeType == XmlNodeType.Element && xr.Name == "nc:IdentificationID")
{
elementCount++;
DocID = xr.ReadElementContentAsString();
if (elementCount == 1)
{
reqID = DocID;
}
if (elementCount == 3)
{
empName = DocID;
listBox1.Items.Add(reqID + " / " + reqDateTime + " / " + empName);
elementCount = 0;
break;
}
My first thought is, that the last '/' doesn't belong to the 'SelectNodes' call.
Alternatively, this code will solve your problem:
foreach(XmlNode node in xmlDoc.GetElementsByTagName("RecordFilingRequest")[0].GetElementsByTagName("nc:DocumentIdentification"))
{
int ID = Convert.ToInt32(node.FirstChild().InnerText);
}
Edit: This does assume, that 'RecordFilingRequest' always exists. Add a try .. catch statement, if that isn't the case.
Looks like the issue is that the XML uses namespaces, and your XPath does not. You didn't post your full XML, but you probably have something like xmlns:nc="http://some.url/ in there. Make sure to include a the namespace in a namespace manager, then add namespace prefixes to your query:
var nameTable = new NameTable();
var nsMgr = new XmlNamespaceManager(nameTable);
nsmgr.AddNamespace("nc", "http://some.url/");
var dataNodes = xmlDoc.SelectNodes("nc:RecordFilingRequest/nc:DocumentIdentification", nsMgr);
foreach (var node in dataNodes)
{
var ID = Convert.ToInt32(node.SelectSingleNode("nc:IdentificationID", nsMgr).InnerText);
// insert into database, e.g. using SqlCommand or whatever
}