This project is read-only.
1
Vote

thread problem on work method

description

Hi esben!
 
private void Work(Crawler crawler, PropertyBag propertyBag)
{
AllocConsole();

Console.Out.WriteLine();
Console.Out.WriteLine("Url: {0}", propertyBag.Step.Uri);
Console.Out.WriteLine("Content type: {0}", propertyBag.ContentType);
Console.Out.WriteLine("Content length: {0}", propertyBag.Text.IsNull() ? 0 : propertyBag.Text.Length);
Console.Out.WriteLine("Depth: {0}", propertyBag.Step.Depth);
Console.Out.WriteLine("ThreadId: {0}", Thread.CurrentThread.ManagedThreadId);
Console.Out.WriteLine("Thread Count: {0}", crawler.ThreadsInUse);
Console.Out.WriteLine();
 
ConsoleCount++;
if (ConsoleCount > 1000)
{
    Console.Clear();
    ConsoleCount = 0;
}
 
bool checkDomain = false;

string link = propertyBag.Step.Uri.ToString();
if (string.IsNullOrEmpty(link))
{
    link = propertyBag.ResponseUri.ToString();
}   
 
Uri uri = propertyBag.Step.Uri;
 
if (string.IsNullOrEmpty(Agency.ValidDomains))
    checkDomain = true;
 
    checkDomain = true;
 
if (Agency.ValidDomains != null && !checkDomain)
{
    string[] validDomains = Agency.ValidDomains.Split(';');
 
    foreach (var validDomain in validDomains)
    {
        string domain = validDomain.Trim();
        checkDomain = uri.Segments.Any(s => String.Equals(s.Replace("/", String.Empty), domain.Trim(), StringComparison.OrdinalIgnoreCase));
 
        if (checkDomain)
        {
            break;
        }
    }
}
 
if (checkDomain && propertyBag.GetResponse() != null)
{
    HtmlDocument htmlDoc = new HtmlDocument
    {
        OptionAddDebuggingAttributes = false,
        OptionAutoCloseOnEnd = true,
        OptionFixNestedTags = true,
        OptionReadEncoding = true
    };
 
    using (Stream reader = propertyBag.GetResponse())
    {
        reader.Seek(0, SeekOrigin.Begin);
        htmlDoc.Load(reader, true);
    }
 
    string htmlContent = htmlDoc.DocumentNode.OuterHtml;
    if (string.IsNullOrEmpty(htmlContent)) return;
 
    IAdvertisementsDao advertismentsDao = DaoFactory.GetAdvertisementsDao();
    List<TagValuePair> listTagValuePair = HtmlHelper.GetTagsAndValues(htmlContent);
 
MORE CODE...
 
 
this is my code in Work method. It is quite large but i have problem that threads mixed data between them. After few hours of crawling data in DB are not correct. I try with lock on whole method but is the same. Do you maybe know what part must i lock that data from thread will not be mixed when writing to DB? Need to be propertyBag under lock?
 
regards
senza

comments