c# - LINQ join Entities from HashSet's, Join vs Dictionary vs HashSet performance -
i have hashset's each stores t, have written test application compares different relation algorithms can think of, i'm not pleased results getting.
does there exist more efficient ways of achieving object relations, once testing?
using system; using system.collections.generic; using system.linq; using system.text; using system.threading.tasks; using system.diagnostics; namespace linqtests { class program { static void main(string[] args) { hashset<user> usertable = new hashset<user>(); hashset<userproperty> userpropertytable = new hashset<userproperty>(); #region lets create dummy data. console.writeline("please wait while creating dummy data, can take while..."); console.writeline(""); int rows = 1000000; for(int x = 0; x < rows; x++) { random rnd = new random(); // add new user. user user = new user(string.format("joachim-{0}", x)); if(!usertable.add(user)) { // throw new exception. } else { userproperty age = new userproperty(user, "age", rnd.next(25, 30).tostring()); if(!userpropertytable.add(age)) { // throw new exception. } userproperty sex = new userproperty(user, "sex", "male"); if (!userpropertytable.add(sex)) { // throw new exception. } userproperty location = new userproperty(user, "location", "norway"); if (!userpropertytable.add(location)) { // throw new exception. } } } #endregion #region lets query tests. ienumerable<user> users; stopwatch stopwatch = new stopwatch(); int matches = 0; // lets find users of age 29. console.writeline("finding users of age 29"); console.writeline(""); console.writeline("---------------------------------------------------"); console.writeline("{0,-20} | {1,6} | {2,9}", "search strategy", "found", "time"); console.writeline("---------------------------------------------------"); // join test. stopwatch.start(); users = (from user in usertable join property in userpropertytable on user.id equals property.userid property.key == "age" && property.value == "29" select user); matches = users.count(); stopwatch.stop(); console.writeline("{0,-20} | {1,6} | {2,6} ms.", "joining tables", matches, stopwatch.elapsedmilliseconds); // dictionary test. stopwatch.restart(); var dictionarysearch = (from t in userpropertytable t.key == "age" && t.value == "29" select t).todictionary(x => x.userid); users = (from t in usertable dictionarysearch.containskey(t.id) select t); matches = users.count(); stopwatch.stop(); console.writeline("{0,-20} | {1,6} | {2,6} ms.", "dictionary contain", matches, stopwatch.elapsedmilliseconds); // hashset test. stopwatch.restart(); var hashsetsearch = new hashset<guid>(from t in userpropertytable t.key == "age" && t.value == "29" select t.userid); users = (from t in usertable hashsetsearch.contains(t.id) select t); matches = users.count(); stopwatch.stop(); console.writeline("{0,-20} | {1,6} | {2,6} ms.", "hashset contain", matches, stopwatch.elapsedmilliseconds); // following takes long wont run them! //// array test. //stopwatch.restart(); //var arraymatch = (from t in userpropertytable t.key == "age" && t.value == "29" select t.userid).toarray(); //users = (from t in usertable arraymatch.contains(t.id) select t); //matches = users.count(); //stopwatch.stop(); //console.writeline("{0,-20} | {1,6} | {2,6} ms.", "array contain", matches, stopwatch.elapsedmilliseconds); //// list test. //stopwatch.restart(); //var listmatch = (from t in userpropertytable t.key == "age" && t.value == "29" select t.userid).tolist(); //users = (from t in usertable listmatch.contains(t.id) select t); //matches = users.count(); //stopwatch.stop(); //console.writeline("{0,-20} | {1,6} | {2,6} ms.", "list contain", matches, stopwatch.elapsedmilliseconds); console.writeline("---------------------------------------------------"); #endregion console.writeline(""); console.writeline("hit return exit..."); console.read(); } } public class user { public user(string username) { this.id = guid.newguid(); this.username = username; } public guid id { get; set; } public string username { get; set; } public override bool equals(object obj) { user other = obj user; if (other == null) return false; return this.id == other.id; } public override int gethashcode() { return id.gethashcode(); } } public class userproperty { public userproperty(user user, string key, string value) { this.id = guid.newguid(); this.userid = user.id; this.key = key; this.value = value; } public guid id { get; private set; } public guid userid {get; private set;} public string key { get; set; } public string value { get; set; } public override bool equals(object obj) { userproperty other = obj userproperty; if (other == null) return false; return this.userid == other.userid && this.key == other.key; } public override int gethashcode() { return string.format("{0}-{1}", this.userid, this.key).gethashcode(); } } }
this make linq/join comparable other methods:
var properties = userpropertytable .where(p=>p.key == "age" && p.value == "29") .toarray(); users = (from user in usertable join property in properties on user.id equals property.userid select user);
here the fastest (~2x) achieve:
var filtereduserids = new hashset<guid>( userpropertytable .where(p=>p.key == "age" && p.value == "29") .select(p=>p.userid)); users = (from user in usertable filtereduserids.contains(user.id) select user);
with output
--------------------------------------------------- search strategy | found | time --------------------------------------------------- method | 210366 | 157 ms. dictionary contain | 210366 | 325 ms. hashset contain | 210366 | 325 ms. ---------------------------------------------------
Comments
Post a Comment