asp.net - How can I populate C# classes from an XML document that has some embedded data? -
i have api has returned this:
http://services.aonaware.com/dictservice/dictservice.asmx?op=defineindict
<?xml version="1.0" encoding="utf-8"?> <worddefinition xmlns:xsi="http://www.w3.org/2001/xmlschema-instance" xmlns:xsd="http://www.w3.org/2001/xmlschema" xmlns="http://services.aonaware.com/webservices/"> <word>abandon</word> <definitions> <definition> <word>abandon</word> <dictionary> <id>wn</id> <name>wordnet (r) 2.0</name> </dictionary> <worddefinition>abandon n 1: trait of lacking restraint or control; freedom inhibition or worry; "she danced abandon" [syn: {wantonness}, {unconstraint}] 2: feeling of extreme emotional intensity; "the wildness of anger" [syn: {wildness}] v 1: forsake, leave behind; "we abandoned old car in empty parking lot" 2: stop maintaining or insisting on; of ideas, claims, etc.; "he abandoned thought of asking hand in marriage"; "both sides have give calims in these negociations" [syn: {give up}] 3: give intent of never claiming again; "abandon life god"; "she gave children ex-husband when moved tahiti"; "we gave drowning victim dead" [syn: {give up}] 4: leave behind empty; move out of; "you must vacate office tonight" [syn: {vacate}, {empty}] 5: leave needs or counts on you; leave in lurch; "the mother deserted children" [syn: {forsake}, {desolate}, {desert}] </worddefinition> </definition> </definitions> </worddefinition>
here code used retrieve xml data:
webrequest request = webrequest.create("http://services.aonaware.com/dictservice/dictservice.asmx/defineindict"); request.method = "post"; string postdata = "dictid=wn&word=abandon"; byte[] bytearray = encoding.utf8.getbytes(postdata); request.contenttype = "application/x-www-form-urlencoded"; request.contentlength = bytearray.length; stream datastream = request.getrequeststream(); datastream.write(bytearray, 0, bytearray.length); datastream.close(); webresponse response = request.getresponse(); console.writeline(((httpwebresponse)response).statusdescription); datastream = response.getresponsestream(); streamreader reader = new streamreader(datastream); string responsefromserver = reader.readtoend(); console.writeline(responsefromserver); reader.close(); datastream.close(); response.close();
i extract data xml list definition class looks like:
public class def { public string text { get; set; } public list<string> synonym { get; set; } } public class definition { public string type { get; set; } // single character: n or v or public list<def> def { get; set; } }
can give me advice on how can , show options available me pick class elements out of xml , put these classes.
as think question helpful many other people i'll open large bounty can take time come example
update:
sorry. made mistake synonym. have changed now. hope makes more sense. synonyms list put in bold needing 2 answers far don't seem answer question @ all. thank you.
i created simple parser word definition (pretty sure there's room improvements here):
solution 1.0
class parseymcparseface { /// <summary> /// word definition lines /// </summary> private string[] _text; /// <summary> /// constructor (takes innertext of worddefinition tag input /// </summary> /// <param name="text">innertext of worddefinition</param> public parseymcparseface(string text) { _text = text.split(new [] {'\n'}, stringsplitoptions.removeemptyentries) .skip(1) // skip first line word mentioned .toarray(); } /// <summary> /// convert single letter type full human readable type /// </summary> /// <param name="c"></param> /// <returns></returns> private string chartotype(char c) { switch (c) { case 'a': return "adjective"; case 'n': return "noun"; case 'v': return "verb"; default: return "unknown"; } } /// <summary> /// reorganize data easier parsing /// </summary> /// <param name="text">lines of text</param> /// <returns></returns> private static list<list<string>> makelists(ienumerable<string> text) { list<list<string>> types = new list<list<string>>(); int = -1; int j = 0; foreach (var line in text) { // new type (noun, verb, adj.) if (regex.ismatch(line.trim(), "^[avn]{1}\\ \\d+")) { types.add(new list<string> { line.trim() }); i++; j = 0; } // new definition in previous type else if (regex.ismatch(line.trim(), "^\\d+")) { j++; types[i].add(line.trim()); } // new line of same definition else { types[i][j] = types[i][j] + " " + line.trim(); } } return types; } public list<definition> parse() { var definitionslines = makelists(_text); list<definition> definitions = new list<definition>(); foreach (var type in definitionslines) { var defs = new list<def>(); foreach (var def in type) { var match = regex.match(def.trim(), "(?:\\:\\ )(\\w|\\ |;|\"|,|\\.|-)*[\\[]{0,1}"); matchcollection syns = regex.matches(def.trim(), "\\{(\\w|\\ )+\\}"); list<string> synonymes = new list<string>(); foreach (match syn in syns) { synonymes.add(syn.value.trim('{', '}')); } defs.add(new def() { text = match.value.trim(':', '[', ' '), synonym = synonymes }); } definitions.add(new definition { type = chartotype(type[0][0]), def = defs }); } return definitions; } }
and here's usage example:
webrequest request = webrequest.create("http://services.aonaware.com/dictservice/dictservice.asmx/defineindict"); request.method = "post"; string postdata = "dictid=wn&word=abandon"; byte[] bytearray = encoding.utf8.getbytes(postdata); request.contenttype = "application/x-www-form-urlencoded"; request.contentlength = bytearray.length; stream datastream = request.getrequeststream(); datastream.write(bytearray, 0, bytearray.length); datastream.close(); webresponse response = request.getresponse(); console.writeline(((httpwebresponse)response).statusdescription); datastream = response.getresponsestream(); streamreader reader = new streamreader(datastream); string responsefromserver = reader.readtoend(); var doc = new xmldocument(); doc.loadxml(responsefromserver ); var el = doc.getelementsbytagname("worddefinition"); parseymcparseface parseymcparseface = new parseymcparseface(el[1].innertext); var parsingresult = parseymcparseface.parse(); // parsingresult contain list of definitions // per format specified in question.
and here's live demo: https://dotnetfiddle.net/24iq67
you can avoid manually retrieving parsing xml adding reference webservice.
solution 2.0
i've made little app parses definition. hosted here on github (it's big post here on stackoverflow):
public enum wordtypes { noun, verb, adjective, adverb, unknown } public class definition { public definition() { synonyms = new list<string>(); anotnyms = new list<string>(); } public wordtypes wordtype { get; set; } public string definitiontext { get; set; } public list<string> synonyms { get; set; } public list<string> anotnyms { get; set; } } static class definitionparser { public static list<definition> parse(string worddefinition) { var worddefinitionlines = worddefinition.split(new[] { '\n' }, stringsplitoptions.removeemptyentries) .skip(1) .select(x => x.trim()) .tolist(); var flatenedlist = makelists(worddefinitionlines).selectmany(x => x).tolist(); var result = new list<definition>(); foreach (var wd in flatenedlist) { var foundmatch = regex.match(wd, @"^(?<matchtype>adv|adj|v|n){0,1}\s*(\d*): (?<definition>[\w\s;""',\.\(\)\!\-]+)(?<extrainfosyns>\[syn: ((?<wordsyn>\{[\w\s\-]+\})|(?:[,\ ]))*\]){0,1}\s*(?<extrainfoants>\[ant: ((?<wordant>\{[\w\s-]+\})|(?:[,\ ]))*\]){0,1}"); var def = new definition(); if (foundmatch.groups["matchtype"].success) { var matchtype = foundmatch.groups["matchtype"]; def.wordtype = definitiontypetoenum(matchtype.value); } if (foundmatch.groups["definition"].success) { var definition = foundmatch.groups["definition"]; def.definitiontext = definition.value; } if (foundmatch.groups["extrainfosyns"].success && foundmatch.groups["wordsyn"].success) { foreach (capture capture in foundmatch.groups["wordsyn"].captures) { def.synonyms.add(capture.value.trim('{','}')); } } if (foundmatch.groups["extrainfoants"].success && foundmatch.groups["wordant"].success) { foreach (capture capture in foundmatch.groups["wordant"].captures) { def.anotnyms.add(capture.value.trim('{', '}')); } } result.add(def); } return result; } private static list<list<string>> makelists(ienumerable<string> text) { list<list<string>> types = new list<list<string>>(); int = -1; int j = 0; foreach (var line in text) { // new type (noun, verb, adj.) if (regex.ismatch(line, "^(adj|v|n|adv){1}\\s\\d*")) { types.add(new list<string> { line }); i++; j = 0; } // new definition in previous type else if (regex.ismatch(line, "^\\d+")) { j++; types[i].add(line); } // new line of same definition else { types[i][j] = types[i][j] + " " + line; } } return types; } private static wordtypes definitiontypetoenum(string input) { switch (input) { case "adj": return wordtypes.adjective; case "adv": return wordtypes.adverb; case "n": return wordtypes.noun; case "v": return wordtypes.verb; default: return wordtypes.unknown; } } }
notes:
- this should work expected
- parsing free text not reliable
- you should import service reference (as noted in other answer) instead of parsing xml manually.
Comments
Post a Comment